ARM64: initial backend import

This adds a second implementation of the AArch64 architecture to LLVM,
accessible in parallel via the "arm64" triple. The plan over the
coming weeks & months is to merge the two into a single backend,
during which time thorough code review should naturally occur.

Everything will be easier with the target in-tree though, hence this
commit.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205090 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover 2014-03-29 10:18:08 +00:00
parent 69bd9577fc
commit 7b837d8c75
394 changed files with 105888 additions and 32 deletions

View File

@ -128,6 +128,7 @@ set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name
set(LLVM_ALL_TARGETS
AArch64
ARM64
ARM
CppBackend
Hexagon
@ -143,7 +144,7 @@ set(LLVM_ALL_TARGETS
)
# List of targets with JIT support:
set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM Mips SystemZ)
set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM64 ARM Mips SystemZ)
set(LLVM_TARGETS_TO_BUILD "all"
CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")

View File

@ -419,6 +419,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
amd64-* | x86_64-*) llvm_cv_target_arch="x86_64" ;;
sparc*-*) llvm_cv_target_arch="Sparc" ;;
powerpc*-*) llvm_cv_target_arch="PowerPC" ;;
arm64*-*) llvm_cv_target_arch="ARM64" ;;
arm*-*) llvm_cv_target_arch="ARM" ;;
aarch64*-*) llvm_cv_target_arch="AArch64" ;;
mips-* | mips64-*) llvm_cv_target_arch="Mips" ;;
@ -454,6 +455,7 @@ case $host in
amd64-* | x86_64-*) host_arch="x86_64" ;;
sparc*-*) host_arch="Sparc" ;;
powerpc*-*) host_arch="PowerPC" ;;
arm64*-*) host_arch="ARM64" ;;
arm*-*) host_arch="ARM" ;;
aarch64*-*) host_arch="AArch64" ;;
mips-* | mips64-*) host_arch="Mips" ;;
@ -795,7 +797,7 @@ else
esac
fi
TARGETS_WITH_JIT="AArch64 ARM Mips PowerPC SystemZ X86"
TARGETS_WITH_JIT="AArch64 ARM ARM64 Mips PowerPC SystemZ X86"
AC_SUBST(TARGETS_WITH_JIT,$TARGETS_WITH_JIT)
dnl Allow enablement of building and installing docs
@ -948,14 +950,14 @@ if test "$llvm_cv_enable_crash_overrides" = "yes" ; then
fi
dnl List all possible targets
ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
AC_SUBST(ALL_TARGETS,$ALL_TARGETS)
dnl Allow specific targets to be specified for building (or not)
TARGETS_TO_BUILD=""
AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
[Build specific host targets: all or target1,target2,... Valid targets are:
host, x86, x86_64, sparc, powerpc, arm, aarch64, mips, hexagon,
host, x86, x86_64, sparc, powerpc, arm64, arm, aarch64, mips, hexagon,
xcore, msp430, nvptx, systemz, r600, and cpp (default=all)]),,
enableval=all)
if test "$enableval" = host-only ; then
@ -970,6 +972,7 @@ case "$enableval" in
sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
aarch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
arm64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
mipsel) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;

View File

@ -366,6 +366,8 @@ elseif (LLVM_NATIVE_ARCH MATCHES "powerpc")
set(LLVM_NATIVE_ARCH PowerPC)
elseif (LLVM_NATIVE_ARCH MATCHES "aarch64")
set(LLVM_NATIVE_ARCH AArch64)
elseif (LLVM_NATIVE_ARCH MATCHES "arm64")
set(LLVM_NATIVE_ARCH ARM64)
elseif (LLVM_NATIVE_ARCH MATCHES "arm")
set(LLVM_NATIVE_ARCH ARM)
elseif (LLVM_NATIVE_ARCH MATCHES "mips")

13
configure vendored
View File

@ -1447,9 +1447,9 @@ Optional Features:
Enable crash handling overrides (default is YES)
--enable-targets Build specific host targets: all or
target1,target2,... Valid targets are: host, x86,
x86_64, sparc, powerpc, arm, aarch64, mips, hexagon,
xcore, msp430, nvptx, systemz, r600, and cpp
(default=all)
x86_64, sparc, powerpc, arm64, arm, aarch64, mips,
hexagon, xcore, msp430, nvptx, systemz, r600, and
cpp (default=all)
--enable-experimental-targets
Build experimental host targets: disable or
target1,target2,... (default=disable)
@ -4151,6 +4151,7 @@ else
amd64-* | x86_64-*) llvm_cv_target_arch="x86_64" ;;
sparc*-*) llvm_cv_target_arch="Sparc" ;;
powerpc*-*) llvm_cv_target_arch="PowerPC" ;;
arm64*-*) llvm_cv_target_arch="ARM64" ;;
arm*-*) llvm_cv_target_arch="ARM" ;;
aarch64*-*) llvm_cv_target_arch="AArch64" ;;
mips-* | mips64-*) llvm_cv_target_arch="Mips" ;;
@ -4187,6 +4188,7 @@ case $host in
amd64-* | x86_64-*) host_arch="x86_64" ;;
sparc*-*) host_arch="Sparc" ;;
powerpc*-*) host_arch="PowerPC" ;;
arm64*-*) host_arch="ARM64" ;;
arm*-*) host_arch="ARM" ;;
aarch64*-*) host_arch="AArch64" ;;
mips-* | mips64-*) host_arch="Mips" ;;
@ -5120,7 +5122,7 @@ else
esac
fi
TARGETS_WITH_JIT="AArch64 ARM Mips PowerPC SystemZ X86"
TARGETS_WITH_JIT="AArch64 ARM ARM64 Mips PowerPC SystemZ X86"
TARGETS_WITH_JIT=$TARGETS_WITH_JIT
@ -5357,7 +5359,7 @@ _ACEOF
fi
ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
ALL_TARGETS=$ALL_TARGETS
@ -5381,6 +5383,7 @@ case "$enableval" in
sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
aarch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
arm64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
mipsel) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;

View File

@ -95,6 +95,16 @@ struct LLVMOpInfo1 {
#define LLVMDisassembler_VariantKind_ARM_HI16 1 /* :upper16: */
#define LLVMDisassembler_VariantKind_ARM_LO16 2 /* :lower16: */
/**
* The ARM64 target VariantKinds.
*/
#define LLVMDisassembler_VariantKind_ARM64_PAGE 1 /* @page */
#define LLVMDisassembler_VariantKind_ARM64_PAGEOFF 2 /* @pageoff */
#define LLVMDisassembler_VariantKind_ARM64_GOTPAGE 3 /* @gotpage */
#define LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF 4 /* @gotpageoff */
#define LLVMDisassembler_VariantKind_ARM64_TLVP 5 /* @tvlppage */
#define LLVMDisassembler_VariantKind_ARM64_TLVOFF 6 /* @tvlppageoff */
/**
* The type for the symbol lookup function. This may be called by the
* disassembler for things like adding a comment for a PC plus a constant
@ -123,6 +133,17 @@ typedef const char *(*LLVMSymbolLookupCallback)(void *DisInfo,
/* The input reference is from a PC relative load instruction. */
#define LLVMDisassembler_ReferenceType_In_PCrel_Load 2
/* The input reference is from an ARM64::ADRP instruction. */
#define LLVMDisassembler_ReferenceType_In_ARM64_ADRP 0x100000001
/* The input reference is from an ARM64::ADDXri instruction. */
#define LLVMDisassembler_ReferenceType_In_ARM64_ADDXri 0x100000002
/* The input reference is from an ARM64::LDRXui instruction. */
#define LLVMDisassembler_ReferenceType_In_ARM64_LDRXui 0x100000003
/* The input reference is from an ARM64::LDRXl instruction. */
#define LLVMDisassembler_ReferenceType_In_ARM64_LDRXl 0x100000004
/* The input reference is from an ARM64::ADR instruction. */
#define LLVMDisassembler_ReferenceType_In_ARM64_ADR 0x100000005
/* The output reference is to as symbol stub. */
#define LLVMDisassembler_ReferenceType_Out_SymbolStub 1
/* The output reference is to a symbol address in a literal pool. */

View File

@ -48,6 +48,7 @@ public:
arm, // ARM (little endian): arm, armv.*, xscale
armeb, // ARM (big endian): armeb
arm64, // ARM: arm64
aarch64, // AArch64 (little endian): aarch64
aarch64_be, // AArch64 (big endian): aarch64_be
hexagon, // Hexagon: hexagon

View File

@ -529,6 +529,7 @@ def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
include "llvm/IR/IntrinsicsPowerPC.td"
include "llvm/IR/IntrinsicsX86.td"
include "llvm/IR/IntrinsicsARM.td"
include "llvm/IR/IntrinsicsARM64.td"
include "llvm/IR/IntrinsicsAArch64.td"
include "llvm/IR/IntrinsicsXCore.td"
include "llvm/IR/IntrinsicsHexagon.td"

View File

@ -0,0 +1,621 @@
//===- IntrinsicsARM64.td - Defines ARM64 intrinsics -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines all of the ARM64-specific intrinsics.
//
//===----------------------------------------------------------------------===//
let TargetPrefix = "arm64" in {
def int_arm64_ldxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
def int_arm64_stxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>;
def int_arm64_clrex : Intrinsic<[]>;
def int_arm64_ldxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>;
def int_arm64_stxp : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty,
llvm_ptr_ty]>;
def int_arm64_sdiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>], [IntrNoMem]>;
def int_arm64_udiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// Advanced SIMD (NEON)
let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.".
class AdvSIMD_2Scalar_Float_Intrinsic
: Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_FPToIntRounding_Intrinsic
: Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
class AdvSIMD_1IntArg_Intrinsic
: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
class AdvSIMD_1FloatArg_Intrinsic
: Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
class AdvSIMD_1VectorArg_Intrinsic
: Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
class AdvSIMD_1VectorArg_Expand_Intrinsic
: Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
class AdvSIMD_1VectorArg_Long_Intrinsic
: Intrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>], [IntrNoMem]>;
class AdvSIMD_1IntArg_Narrow_Intrinsic
: Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty], [IntrNoMem]>;
class AdvSIMD_1VectorArg_Narrow_Intrinsic
: Intrinsic<[llvm_anyint_ty], [LLVMExtendedType<0>], [IntrNoMem]>;
class AdvSIMD_1VectorArg_Int_Across_Intrinsic
: Intrinsic<[llvm_anyint_ty], [llvm_anyvector_ty], [IntrNoMem]>;
class AdvSIMD_1VectorArg_Float_Across_Intrinsic
: Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
class AdvSIMD_2IntArg_Intrinsic
: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_2FloatArg_Intrinsic
: Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Intrinsic
: Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Compare_Intrinsic
: Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>],
[IntrNoMem]>;
class AdvSIMD_2Arg_FloatCompare_Intrinsic
: Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Long_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMTruncatedType<0>,
LLVMTruncatedType<0>],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Wide_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMTruncatedType<0>],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Narrow_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMExtendedType<0>, LLVMExtendedType<0>],
[IntrNoMem]>;
class AdvSIMD_2Arg_Scalar_Narrow_Intrinsic
: Intrinsic<[llvm_anyint_ty],
[LLVMExtendedType<0>, llvm_i32_ty],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Scalar_Expand_BySize_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMTruncatedType<0>],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMTruncatedType<0>, llvm_i32_ty],
[IntrNoMem]>;
class AdvSIMD_2VectorArg_Tied_Narrow_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMHalfElementsVectorType<0>, llvm_anyvector_ty],
[IntrNoMem]>;
class AdvSIMD_3VectorArg_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_3VectorArg_Scalar_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem]>;
class AdvSIMD_3VectorArg_Tied_Narrow_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMHalfElementsVectorType<0>, llvm_anyvector_ty,
LLVMMatchType<1>], [IntrNoMem]>;
class AdvSIMD_3VectorArg_Scalar_Tied_Narrow_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMHalfElementsVectorType<0>, llvm_anyvector_ty, llvm_i32_ty],
[IntrNoMem]>;
class AdvSIMD_CvtFxToFP_Intrinsic
: Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
[IntrNoMem]>;
class AdvSIMD_CvtFPToFx_Intrinsic
: Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty],
[IntrNoMem]>;
}
// Arithmetic ops
let Properties = [IntrNoMem] in {
// Vector Add Across Lanes
def int_arm64_neon_saddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
def int_arm64_neon_uaddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
def int_arm64_neon_faddv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
// Vector Long Add Across Lanes
def int_arm64_neon_saddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
def int_arm64_neon_uaddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
// Vector Halving Add
def int_arm64_neon_shadd : AdvSIMD_2VectorArg_Intrinsic;
def int_arm64_neon_uhadd : AdvSIMD_2VectorArg_Intrinsic;
// Vector Rounding Halving Add
def int_arm64_neon_srhadd : AdvSIMD_2VectorArg_Intrinsic;
def int_arm64_neon_urhadd : AdvSIMD_2VectorArg_Intrinsic;
// Vector Saturating Add
def int_arm64_neon_sqadd : AdvSIMD_2IntArg_Intrinsic;
def int_arm64_neon_suqadd : AdvSIMD_2IntArg_Intrinsic;
def int_arm64_neon_usqadd : AdvSIMD_2IntArg_Intrinsic;
def int_arm64_neon_uqadd : AdvSIMD_2IntArg_Intrinsic;
// Vector Add High-Half
// FIXME: this is a legacy intrinsic for aarch64_simd.h. Remove it when that
// header is no longer supported.
def int_arm64_neon_addhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
// Vector Rounding Add High-Half
def int_arm64_neon_raddhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
// Vector Saturating Doubling Multiply High
def int_arm64_neon_sqdmulh : AdvSIMD_2IntArg_Intrinsic;
// Vector Saturating Rounding Doubling Multiply High
def int_arm64_neon_sqrdmulh : AdvSIMD_2IntArg_Intrinsic;
// Vector Polynominal Multiply
def int_arm64_neon_pmul : AdvSIMD_2VectorArg_Intrinsic;
// Vector Long Multiply
def int_arm64_neon_smull : AdvSIMD_2VectorArg_Long_Intrinsic;
def int_arm64_neon_umull : AdvSIMD_2VectorArg_Long_Intrinsic;
def int_arm64_neon_pmull : AdvSIMD_2VectorArg_Long_Intrinsic;
// Vector Extending Multiply
def int_arm64_neon_fmulx : AdvSIMD_2FloatArg_Intrinsic;
// Vector Saturating Doubling Long Multiply
def int_arm64_neon_sqdmull : AdvSIMD_2VectorArg_Long_Intrinsic;
def int_arm64_neon_sqdmulls_scalar
: Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
// Vector Halving Subtract
def int_arm64_neon_shsub : AdvSIMD_2VectorArg_Intrinsic;
def int_arm64_neon_uhsub : AdvSIMD_2VectorArg_Intrinsic;
// Vector Saturating Subtract
def int_arm64_neon_sqsub : AdvSIMD_2IntArg_Intrinsic;
def int_arm64_neon_uqsub : AdvSIMD_2IntArg_Intrinsic;
// Vector Subtract High-Half
// FIXME: this is a legacy intrinsic for aarch64_simd.h. Remove it when that
// header is no longer supported.
def int_arm64_neon_subhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
// Vector Rounding Subtract High-Half
def int_arm64_neon_rsubhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
// Vector Compare Absolute Greater-than-or-equal
def int_arm64_neon_facge : AdvSIMD_2Arg_FloatCompare_Intrinsic;
// Vector Compare Absolute Greater-than
def int_arm64_neon_facgt : AdvSIMD_2Arg_FloatCompare_Intrinsic;
// Vector Absolute Difference
def int_arm64_neon_sabd : AdvSIMD_2VectorArg_Intrinsic;
def int_arm64_neon_uabd : AdvSIMD_2VectorArg_Intrinsic;
def int_arm64_neon_fabd : AdvSIMD_2VectorArg_Intrinsic;
// Scalar Absolute Difference
def int_arm64_sisd_fabd : AdvSIMD_2Scalar_Float_Intrinsic;
// Vector Max
def int_arm64_neon_smax : AdvSIMD_2VectorArg_Intrinsic;
def int_arm64_neon_umax : AdvSIMD_2VectorArg_Intrinsic;
def int_arm64_neon_fmax : AdvSIMD_2VectorArg_Intrinsic;
def int_arm64_neon_fmaxnmp : AdvSIMD_2VectorArg_Intrinsic;
// Vector Max Across Lanes
def int_arm64_neon_smaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
def int_arm64_neon_umaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
def int_arm64_neon_fmaxv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
def int_arm64_neon_fmaxnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
// Vector Min
def int_arm64_neon_smin : AdvSIMD_2VectorArg_Intrinsic;
def int_arm64_neon_umin : AdvSIMD_2VectorArg_Intrinsic;
def int_arm64_neon_fmin : AdvSIMD_2VectorArg_Intrinsic;
def int_arm64_neon_fminnmp : AdvSIMD_2VectorArg_Intrinsic;
// Vector Min/Max Number
def int_arm64_neon_fminnm : AdvSIMD_2FloatArg_Intrinsic;
def int_arm64_neon_fmaxnm : AdvSIMD_2FloatArg_Intrinsic;
// Vector Min Across Lanes
def int_arm64_neon_sminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
def int_arm64_neon_uminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
def int_arm64_neon_fminv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
def int_arm64_neon_fminnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
// Pairwise Add
def int_arm64_neon_addp : AdvSIMD_2VectorArg_Intrinsic;
// Long Pairwise Add
// FIXME: In theory, we shouldn't need intrinsics for saddlp or
// uaddlp, but tblgen's type inference currently can't handle the
// pattern fragments this ends up generating.
def int_arm64_neon_saddlp : AdvSIMD_1VectorArg_Expand_Intrinsic;
def int_arm64_neon_uaddlp : AdvSIMD_1VectorArg_Expand_Intrinsic;
// Folding Maximum
def int_arm64_neon_smaxp : AdvSIMD_2VectorArg_Intrinsic;
def int_arm64_neon_umaxp : AdvSIMD_2VectorArg_Intrinsic;
def int_arm64_neon_fmaxp : AdvSIMD_2VectorArg_Intrinsic;
// Folding Minimum
def int_arm64_neon_sminp : AdvSIMD_2VectorArg_Intrinsic;
def int_arm64_neon_uminp : AdvSIMD_2VectorArg_Intrinsic;
def int_arm64_neon_fminp : AdvSIMD_2VectorArg_Intrinsic;
// Reciprocal Estimate/Step
def int_arm64_neon_frecps : AdvSIMD_2FloatArg_Intrinsic;
def int_arm64_neon_frsqrts : AdvSIMD_2FloatArg_Intrinsic;
// Vector Saturating Shift Left
def int_arm64_neon_sqshl : AdvSIMD_2IntArg_Intrinsic;
def int_arm64_neon_uqshl : AdvSIMD_2IntArg_Intrinsic;
// Vector Rounding Shift Left
def int_arm64_neon_srshl : AdvSIMD_2IntArg_Intrinsic;
def int_arm64_neon_urshl : AdvSIMD_2IntArg_Intrinsic;
// Vector Saturating Rounding Shift Left
def int_arm64_neon_sqrshl : AdvSIMD_2IntArg_Intrinsic;
def int_arm64_neon_uqrshl : AdvSIMD_2IntArg_Intrinsic;
// Vector Signed->Unsigned Shift Left by Constant
def int_arm64_neon_sqshlu : AdvSIMD_2IntArg_Intrinsic;
// Vector Signed->Unsigned Narrowing Saturating Shift Right by Constant
def int_arm64_neon_sqshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
// Vector Signed->Unsigned Rounding Narrowing Saturating Shift Right by Const
def int_arm64_neon_sqrshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
// Vector Narrowing Shift Right by Constant
def int_arm64_neon_sqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
def int_arm64_neon_uqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
// Vector Rounding Narrowing Shift Right by Constant
def int_arm64_neon_rshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
// Vector Rounding Narrowing Saturating Shift Right by Constant
def int_arm64_neon_sqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
def int_arm64_neon_uqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
// Vector Shift Left
def int_arm64_neon_sshl : AdvSIMD_2IntArg_Intrinsic;
def int_arm64_neon_ushl : AdvSIMD_2IntArg_Intrinsic;
// Vector Widening Shift Left by Constant
def int_arm64_neon_shll : AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic;
def int_arm64_neon_sshll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic;
def int_arm64_neon_ushll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic;
// Vector Shift Right by Constant and Insert
def int_arm64_neon_vsri : AdvSIMD_3VectorArg_Scalar_Intrinsic;
// Vector Shift Left by Constant and Insert
def int_arm64_neon_vsli : AdvSIMD_3VectorArg_Scalar_Intrinsic;
// Vector Saturating Narrow
def int_arm64_neon_scalar_sqxtn: AdvSIMD_1IntArg_Narrow_Intrinsic;
def int_arm64_neon_scalar_uqxtn : AdvSIMD_1IntArg_Narrow_Intrinsic;
def int_arm64_neon_sqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic;
def int_arm64_neon_uqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic;
// Vector Saturating Extract and Unsigned Narrow
def int_arm64_neon_scalar_sqxtun : AdvSIMD_1IntArg_Narrow_Intrinsic;
def int_arm64_neon_sqxtun : AdvSIMD_1VectorArg_Narrow_Intrinsic;
// Vector Absolute Value
def int_arm64_neon_abs : AdvSIMD_1VectorArg_Intrinsic;
// Vector Saturating Absolute Value
def int_arm64_neon_sqabs : AdvSIMD_1IntArg_Intrinsic;
// Vector Saturating Negation
def int_arm64_neon_sqneg : AdvSIMD_1IntArg_Intrinsic;
// Vector Count Leading Sign Bits
def int_arm64_neon_cls : AdvSIMD_1VectorArg_Intrinsic;
// Vector Reciprocal Estimate
def int_arm64_neon_urecpe : AdvSIMD_1VectorArg_Intrinsic;
def int_arm64_neon_frecpe : AdvSIMD_1VectorArg_Intrinsic;
// Vector Square Root Estimate
def int_arm64_neon_ursqrte : AdvSIMD_1VectorArg_Intrinsic;
def int_arm64_neon_frsqrte : AdvSIMD_1VectorArg_Intrinsic;
// Vector Bitwise Reverse
def int_arm64_neon_rbit : AdvSIMD_1VectorArg_Intrinsic;
// Vector Conversions Between Half-Precision and Single-Precision.
def int_arm64_neon_vcvtfp2hf
: Intrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_arm64_neon_vcvthf2fp
: Intrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>;
// Vector Conversions Between Floating-point and Fixed-point.
def int_arm64_neon_vcvtfp2fxs : AdvSIMD_CvtFPToFx_Intrinsic;
def int_arm64_neon_vcvtfp2fxu : AdvSIMD_CvtFPToFx_Intrinsic;
def int_arm64_neon_vcvtfxs2fp : AdvSIMD_CvtFxToFP_Intrinsic;
def int_arm64_neon_vcvtfxu2fp : AdvSIMD_CvtFxToFP_Intrinsic;
// Vector FP->Int Conversions
def int_arm64_neon_fcvtas : AdvSIMD_FPToIntRounding_Intrinsic;
def int_arm64_neon_fcvtau : AdvSIMD_FPToIntRounding_Intrinsic;
def int_arm64_neon_fcvtms : AdvSIMD_FPToIntRounding_Intrinsic;
def int_arm64_neon_fcvtmu : AdvSIMD_FPToIntRounding_Intrinsic;
def int_arm64_neon_fcvtns : AdvSIMD_FPToIntRounding_Intrinsic;
def int_arm64_neon_fcvtnu : AdvSIMD_FPToIntRounding_Intrinsic;
def int_arm64_neon_fcvtps : AdvSIMD_FPToIntRounding_Intrinsic;
def int_arm64_neon_fcvtpu : AdvSIMD_FPToIntRounding_Intrinsic;
def int_arm64_neon_fcvtzs : AdvSIMD_FPToIntRounding_Intrinsic;
def int_arm64_neon_fcvtzu : AdvSIMD_FPToIntRounding_Intrinsic;
// Vector FP Rounding: only ties to even is unrepresented by a normal
// intrinsic.
def int_arm64_neon_frintn : AdvSIMD_1FloatArg_Intrinsic;
// Scalar FP->Int conversions
// Vector FP Inexact Narrowing
def int_arm64_neon_fcvtxn : AdvSIMD_1VectorArg_Expand_Intrinsic;
// Scalar FP Inexact Narrowing
def int_arm64_sisd_fcvtxn : Intrinsic<[llvm_float_ty], [llvm_double_ty],
[IntrNoMem]>;
}
let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.".
class AdvSIMD_2Vector2Index_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, llvm_i64_ty, LLVMMatchType<0>, llvm_i64_ty],
[IntrNoMem]>;
}
// Vector element to element moves
def int_arm64_neon_vcopy_lane: AdvSIMD_2Vector2Index_Intrinsic;
let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.".
class AdvSIMD_1Vec_Load_Intrinsic
: Intrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType<LLVMMatchType<0>>],
[IntrReadArgMem]>;
class AdvSIMD_1Vec_Store_Lane_Intrinsic
: Intrinsic<[], [llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty],
[IntrReadWriteArgMem, NoCapture<2>]>;
class AdvSIMD_2Vec_Load_Intrinsic
: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
[LLVMAnyPointerType<LLVMMatchType<0>>],
[IntrReadArgMem]>;
class AdvSIMD_2Vec_Load_Lane_Intrinsic
: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
[LLVMMatchType<0>, LLVMMatchType<0>,
llvm_i64_ty, llvm_anyptr_ty],
[IntrReadArgMem]>;
class AdvSIMD_2Vec_Store_Intrinsic
: Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
LLVMAnyPointerType<LLVMMatchType<0>>],
[IntrReadWriteArgMem, NoCapture<2>]>;
class AdvSIMD_2Vec_Store_Lane_Intrinsic
: Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
llvm_i64_ty, llvm_anyptr_ty],
[IntrReadWriteArgMem, NoCapture<3>]>;
class AdvSIMD_3Vec_Load_Intrinsic
: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
[LLVMAnyPointerType<LLVMMatchType<0>>],
[IntrReadArgMem]>;
class AdvSIMD_3Vec_Load_Lane_Intrinsic
: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
llvm_i64_ty, llvm_anyptr_ty],
[IntrReadArgMem]>;
class AdvSIMD_3Vec_Store_Intrinsic
: Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMAnyPointerType<LLVMMatchType<0>>],
[IntrReadWriteArgMem, NoCapture<3>]>;
class AdvSIMD_3Vec_Store_Lane_Intrinsic
: Intrinsic<[], [llvm_anyvector_ty,
LLVMMatchType<0>, LLVMMatchType<0>,
llvm_i64_ty, llvm_anyptr_ty],
[IntrReadWriteArgMem, NoCapture<4>]>;
class AdvSIMD_4Vec_Load_Intrinsic
: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>],
[LLVMAnyPointerType<LLVMMatchType<0>>],
[IntrReadArgMem]>;
class AdvSIMD_4Vec_Load_Lane_Intrinsic
: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>],
[LLVMMatchType<0>, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>,
llvm_i64_ty, llvm_anyptr_ty],
[IntrReadArgMem]>;
class AdvSIMD_4Vec_Store_Intrinsic
: Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>,
LLVMAnyPointerType<LLVMMatchType<0>>],
[IntrReadWriteArgMem, NoCapture<4>]>;
class AdvSIMD_4Vec_Store_Lane_Intrinsic
: Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>,
llvm_i64_ty, llvm_anyptr_ty],
[IntrReadWriteArgMem, NoCapture<5>]>;
}
// Memory ops
def int_arm64_neon_ld1x2 : AdvSIMD_2Vec_Load_Intrinsic;
def int_arm64_neon_ld1x3 : AdvSIMD_3Vec_Load_Intrinsic;
def int_arm64_neon_ld1x4 : AdvSIMD_4Vec_Load_Intrinsic;
def int_arm64_neon_st1x2 : AdvSIMD_2Vec_Store_Intrinsic;
def int_arm64_neon_st1x3 : AdvSIMD_3Vec_Store_Intrinsic;
def int_arm64_neon_st1x4 : AdvSIMD_4Vec_Store_Intrinsic;
def int_arm64_neon_ld2 : AdvSIMD_2Vec_Load_Intrinsic;
def int_arm64_neon_ld3 : AdvSIMD_3Vec_Load_Intrinsic;
def int_arm64_neon_ld4 : AdvSIMD_4Vec_Load_Intrinsic;
def int_arm64_neon_ld2lane : AdvSIMD_2Vec_Load_Lane_Intrinsic;
def int_arm64_neon_ld3lane : AdvSIMD_3Vec_Load_Lane_Intrinsic;
def int_arm64_neon_ld4lane : AdvSIMD_4Vec_Load_Lane_Intrinsic;
def int_arm64_neon_ld2r : AdvSIMD_2Vec_Load_Intrinsic;
def int_arm64_neon_ld3r : AdvSIMD_3Vec_Load_Intrinsic;
def int_arm64_neon_ld4r : AdvSIMD_4Vec_Load_Intrinsic;
def int_arm64_neon_st2 : AdvSIMD_2Vec_Store_Intrinsic;
def int_arm64_neon_st3 : AdvSIMD_3Vec_Store_Intrinsic;
def int_arm64_neon_st4 : AdvSIMD_4Vec_Store_Intrinsic;
def int_arm64_neon_st2lane : AdvSIMD_2Vec_Store_Lane_Intrinsic;
def int_arm64_neon_st3lane : AdvSIMD_3Vec_Store_Lane_Intrinsic;
def int_arm64_neon_st4lane : AdvSIMD_4Vec_Store_Lane_Intrinsic;
let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.".
class AdvSIMD_Tbl1_Intrinsic
: Intrinsic<[llvm_anyvector_ty], [llvm_v16i8_ty, LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_Tbl2_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
class AdvSIMD_Tbl3_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_Tbl4_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_Tbx1_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_v16i8_ty, LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_Tbx2_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_Tbx3_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
llvm_v16i8_ty, LLVMMatchType<0>],
[IntrNoMem]>;
class AdvSIMD_Tbx4_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>],
[IntrNoMem]>;
}
def int_arm64_neon_tbl1 : AdvSIMD_Tbl1_Intrinsic;
def int_arm64_neon_tbl2 : AdvSIMD_Tbl2_Intrinsic;
def int_arm64_neon_tbl3 : AdvSIMD_Tbl3_Intrinsic;
def int_arm64_neon_tbl4 : AdvSIMD_Tbl4_Intrinsic;
def int_arm64_neon_tbx1 : AdvSIMD_Tbx1_Intrinsic;
def int_arm64_neon_tbx2 : AdvSIMD_Tbx2_Intrinsic;
def int_arm64_neon_tbx3 : AdvSIMD_Tbx3_Intrinsic;
def int_arm64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic;
let TargetPrefix = "arm64" in {
class Crypto_AES_DataKey_Intrinsic
: Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
class Crypto_AES_Data_Intrinsic
: Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
// SHA intrinsic taking 5 words of the hash (v4i32, i32) and 4 of the schedule
// (v4i32).
class Crypto_SHA_5Hash4Schedule_Intrinsic
: Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
// SHA intrinsic taking 5 words of the hash (v4i32, i32) and 4 of the schedule
// (v4i32).
class Crypto_SHA_1Hash_Intrinsic
: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
// SHA intrinsic taking 8 words of the schedule
class Crypto_SHA_8Schedule_Intrinsic
: Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
// SHA intrinsic taking 12 words of the schedule
class Crypto_SHA_12Schedule_Intrinsic
: Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
// SHA intrinsic taking 8 words of the hash and 4 of the schedule.
class Crypto_SHA_8Hash4Schedule_Intrinsic
: Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
}
// AES
def int_arm64_crypto_aese : Crypto_AES_DataKey_Intrinsic;
def int_arm64_crypto_aesd : Crypto_AES_DataKey_Intrinsic;
def int_arm64_crypto_aesmc : Crypto_AES_Data_Intrinsic;
def int_arm64_crypto_aesimc : Crypto_AES_Data_Intrinsic;
// SHA1
def int_arm64_crypto_sha1c : Crypto_SHA_5Hash4Schedule_Intrinsic;
def int_arm64_crypto_sha1p : Crypto_SHA_5Hash4Schedule_Intrinsic;
def int_arm64_crypto_sha1m : Crypto_SHA_5Hash4Schedule_Intrinsic;
def int_arm64_crypto_sha1h : Crypto_SHA_1Hash_Intrinsic;
def int_arm64_crypto_sha1su0 : Crypto_SHA_12Schedule_Intrinsic;
def int_arm64_crypto_sha1su1 : Crypto_SHA_8Schedule_Intrinsic;
// SHA256
def int_arm64_crypto_sha256h : Crypto_SHA_8Hash4Schedule_Intrinsic;
def int_arm64_crypto_sha256h2 : Crypto_SHA_8Hash4Schedule_Intrinsic;
def int_arm64_crypto_sha256su0 : Crypto_SHA_8Schedule_Intrinsic;
def int_arm64_crypto_sha256su1 : Crypto_SHA_12Schedule_Intrinsic;
//===----------------------------------------------------------------------===//
// CRC32
let TargetPrefix = "arm64" in {
def int_arm64_crc32b : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_arm64_crc32cb : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_arm64_crc32h : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_arm64_crc32ch : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_arm64_crc32w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_arm64_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_arm64_crc32x : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
[IntrNoMem]>;
def int_arm64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
[IntrNoMem]>;
}

View File

@ -158,7 +158,13 @@ public:
VK_TLSLDM,
VK_TPOFF,
VK_DTPOFF,
VK_TLVP, // Mach-O thread local variable relocation
VK_TLVP, // Mach-O thread local variable relocations
VK_TLVPPAGE,
VK_TLVPPAGEOFF,
VK_PAGE,
VK_PAGEOFF,
VK_GOTPAGE,
VK_GOTPAGEOFF,
VK_SECREL,
VK_WEAKREF, // The link between the symbols in .weakref foo, bar

View File

@ -408,6 +408,34 @@ namespace llvm {
ARM_RELOC_HALF = 8,
ARM_RELOC_HALF_SECTDIFF = 9,
// Constant values for the r_type field in an ARM64 architecture
// llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
// structure.
// For pointers.
ARM64_RELOC_UNSIGNED = 0,
// Must be followed by an ARM64_RELOC_UNSIGNED
ARM64_RELOC_SUBTRACTOR = 1,
// A B/BL instruction with 26-bit displacement.
ARM64_RELOC_BRANCH26 = 2,
// PC-rel distance to page of target.
ARM64_RELOC_PAGE21 = 3,
// Offset within page, scaled by r_length.
ARM64_RELOC_PAGEOFF12 = 4,
// PC-rel distance to page of GOT slot.
ARM64_RELOC_GOT_LOAD_PAGE21 = 5,
// Offset within page of GOT slot, scaled by r_length.
ARM64_RELOC_GOT_LOAD_PAGEOFF12 = 6,
// For pointers to GOT slots.
ARM64_RELOC_POINTER_TO_GOT = 7,
// PC-rel distance to page of TLVP slot.
ARM64_RELOC_TLVP_LOAD_PAGE21 = 8,
// Offset within page of TLVP slot, scaled by r_length.
ARM64_RELOC_TLVP_LOAD_PAGEOFF12 = 9,
// Must be followed by ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12.
ARM64_RELOC_ADDEND = 10,
// Constant values for the r_type field in an x86_64 architecture
// llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
// structure
@ -914,6 +942,7 @@ namespace llvm {
/* CPU_TYPE_MIPS = 8, */
CPU_TYPE_MC98000 = 10, // Old Motorola PowerPC
CPU_TYPE_ARM = 12,
CPU_TYPE_ARM64 = CPU_TYPE_ARM | CPU_ARCH_ABI64,
CPU_TYPE_SPARC = 14,
CPU_TYPE_POWERPC = 18,
CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64
@ -987,6 +1016,10 @@ namespace llvm {
CPU_SUBTYPE_ARM_V7EM = 16
};
enum CPUSubTypeARM64 {
CPU_SUBTYPE_ARM64_ALL = 0
};
enum CPUSubTypeSPARC {
CPU_SUBTYPE_SPARC_ALL = 0
};

View File

@ -362,7 +362,6 @@ def bitconvert : SDNode<"ISD::BITCAST" , SDTUnaryOp>;
def extractelt : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTVecExtract>;
def insertelt : SDNode<"ISD::INSERT_VECTOR_ELT", SDTVecInsert>;
def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPCommutative]>;
def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>;
def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>;
@ -466,7 +465,7 @@ def vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
def vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>;
def concat_vectors : SDNode<"ISD::CONCAT_VECTORS",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1, 2>]>,[]>;
SDTypeProfile<1, 2, [SDTCisSubVecOfVec<1, 0>, SDTCisSameAs<1, 2>]>,[]>;
// This operator does not do subvector type checking. The ARM
// backend, at least, needs it.

View File

@ -167,6 +167,10 @@ void RuntimeDyldMachO::resolveRelocation(const SectionEntry &Section,
resolveARMRelocation(LocalAddress, FinalAddress, (uintptr_t)Value, isPCRel,
MachoType, Size, Addend);
break;
case Triple::arm64:
resolveARM64Relocation(LocalAddress, FinalAddress, (uintptr_t)Value,
isPCRel, MachoType, Size, Addend);
break;
}
}
@ -293,6 +297,55 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress,
return false;
}
bool RuntimeDyldMachO::resolveARM64Relocation(uint8_t *LocalAddress,
uint64_t FinalAddress,
uint64_t Value, bool isPCRel,
unsigned Type, unsigned Size,
int64_t Addend) {
// If the relocation is PC-relative, the value to be encoded is the
// pointer difference.
if (isPCRel)
Value -= FinalAddress;
switch (Type) {
default:
llvm_unreachable("Invalid relocation type!");
case MachO::ARM64_RELOC_UNSIGNED: {
// Mask in the target value a byte at a time (we don't have an alignment
// guarantee for the target address, so this is safest).
uint8_t *p = (uint8_t *)LocalAddress;
for (unsigned i = 0; i < Size; ++i) {
*p++ = (uint8_t)Value;
Value >>= 8;
}
break;
}
case MachO::ARM64_RELOC_BRANCH26: {
// Mask the value into the target address. We know instructions are
// 32-bit aligned, so we can do it all at once.
uint32_t *p = (uint32_t *)LocalAddress;
// The low two bits of the value are not encoded.
Value >>= 2;
// Mask the value to 26 bits.
Value &= 0x3ffffff;
// Insert the value into the instruction.
*p = (*p & ~0x3ffffff) | Value;
break;
}
case MachO::ARM64_RELOC_SUBTRACTOR:
case MachO::ARM64_RELOC_PAGE21:
case MachO::ARM64_RELOC_PAGEOFF12:
case MachO::ARM64_RELOC_GOT_LOAD_PAGE21:
case MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12:
case MachO::ARM64_RELOC_POINTER_TO_GOT:
case MachO::ARM64_RELOC_TLVP_LOAD_PAGE21:
case MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
case MachO::ARM64_RELOC_ADDEND:
return Error("Relocation type not implemented yet!");
}
return false;
}
relocation_iterator RuntimeDyldMachO::processRelocationRef(
unsigned SectionID, relocation_iterator RelI, ObjectImage &Obj,
ObjSectionToIDMap &ObjSectionToID, const SymbolTableMap &Symbols,

View File

@ -34,6 +34,9 @@ class RuntimeDyldMachO : public RuntimeDyldImpl {
bool resolveARMRelocation(uint8_t *LocalAddress, uint64_t FinalAddress,
uint64_t Value, bool isPCRel, unsigned Type,
unsigned Size, int64_t Addend);
bool resolveARM64Relocation(uint8_t *LocalAddress, uint64_t FinalAddress,
uint64_t Value, bool IsPCRel, unsigned Type,
unsigned Size, int64_t Addend);
void resolveRelocation(const SectionEntry &Section, uint64_t Offset,
uint64_t Value, uint32_t Type, int64_t Addend,

View File

@ -321,6 +321,8 @@ bool LTOCodeGenerator::determineTarget(std::string &errMsg) {
MCpu = "core2";
else if (Triple.getArch() == llvm::Triple::x86)
MCpu = "yonah";
else if (Triple.getArch() == llvm::Triple::arm64)
MCpu = "cyclone";
}
TargetMach = march->createTargetMachine(TripleStr, MCpu, FeatureStr, Options,

View File

@ -168,6 +168,8 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
CPU = "core2";
else if (Triple.getArch() == llvm::Triple::x86)
CPU = "yonah";
else if (Triple.getArch() == llvm::Triple::arm64)
CPU = "cyclone";
}
TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,

View File

@ -179,6 +179,12 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_TPOFF: return "TPOFF";
case VK_DTPOFF: return "DTPOFF";
case VK_TLVP: return "TLVP";
case VK_TLVPPAGE: return "TLVPPAGE";
case VK_TLVPPAGEOFF: return "TLVPPAGEOFF";
case VK_PAGE: return "PAGE";
case VK_PAGEOFF: return "PAGEOFF";
case VK_GOTPAGE: return "GOTPAGE";
case VK_GOTPAGEOFF: return "GOTPAGEOFF";
case VK_SECREL: return "SECREL32";
case VK_WEAKREF: return "WEAKREF";
case VK_ARM_NONE: return "none";
@ -300,6 +306,18 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("dtpoff", VK_DTPOFF)
.Case("TLVP", VK_TLVP)
.Case("tlvp", VK_TLVP)
.Case("TLVPPAGE", VK_TLVPPAGE)
.Case("tlvppage", VK_TLVPPAGE)
.Case("TLVPPAGEOFF", VK_TLVPPAGEOFF)
.Case("tlvppageoff", VK_TLVPPAGEOFF)
.Case("PAGE", VK_PAGE)
.Case("page", VK_PAGE)
.Case("PAGEOFF", VK_PAGEOFF)
.Case("pageoff", VK_PAGEOFF)
.Case("GOTPAGE", VK_GOTPAGE)
.Case("gotpage", VK_GOTPAGE)
.Case("GOTPAGEOFF", VK_GOTPAGEOFF)
.Case("gotpageoff", VK_GOTPAGEOFF)
.Case("IMGREL", VK_COFF_IMGREL32)
.Case("imgrel", VK_COFF_IMGREL32)
.Case("SECREL32", VK_SECREL)

View File

@ -22,6 +22,9 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
IsFunctionEHFrameSymbolPrivate = false;
SupportsWeakOmittedEHFrame = false;
if (T.isOSDarwin() && T.getArch() == Triple::arm64)
SupportsCompactUnwindWithoutEHFrame = true;
PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel
| dwarf::DW_EH_PE_sdata4;
LSDAEncoding = FDEEncoding = FDECFIEncoding = dwarf::DW_EH_PE_pcrel;
@ -146,7 +149,8 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
COFFDebugSymbolsSection = 0;
if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) {
if ((T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) ||
(T.isOSDarwin() && T.getArch() == Triple::arm64)) {
CompactUnwindSection =
Ctx->getMachOSection("__LD", "__compact_unwind",
MachO::S_ATTR_DEBUG,
@ -154,6 +158,8 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)
CompactUnwindDwarfEHFrameOnly = 0x04000000;
else if (T.getArch() == Triple::arm64)
CompactUnwindDwarfEHFrameOnly = 0x03000000;
}
// Debug Information.
@ -763,6 +769,7 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
// cellspu-apple-darwin. Perhaps we should fix in Triple?
if ((Arch == Triple::x86 || Arch == Triple::x86_64 ||
Arch == Triple::arm || Arch == Triple::thumb ||
Arch == Triple::arm64 ||
Arch == Triple::ppc || Arch == Triple::ppc64 ||
Arch == Triple::UnknownArch) &&
(T.isOSDarwin() || T.isOSBinFormatMachO())) {

View File

@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Object/MachO.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Format.h"
@ -934,6 +935,23 @@ MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
res = Table[RType];
break;
}
case Triple::arm64:
case Triple::aarch64: {
static const char *const Table[] = {
"ARM64_RELOC_UNSIGNED", "ARM64_RELOC_SUBTRACTOR",
"ARM64_RELOC_BRANCH26", "ARM64_RELOC_PAGE21",
"ARM64_RELOC_PAGEOFF12", "ARM64_RELOC_GOT_LOAD_PAGE21",
"ARM64_RELOC_GOT_LOAD_PAGEOFF12", "ARM64_RELOC_POINTER_TO_GOT",
"ARM64_RELOC_TLVP_LOAD_PAGE21", "ARM64_RELOC_TLVP_LOAD_PAGEOFF12",
"ARM64_RELOC_ADDEND"
};
if (RType >= array_lengthof(Table))
res = "Unknown";
else
res = Table[RType];
break;
}
case Triple::ppc: {
static const char *const Table[] = {
"PPC_RELOC_VANILLA",
@ -1256,6 +1274,8 @@ StringRef MachOObjectFile::getFileFormatName() const {
switch (CPUType) {
case llvm::MachO::CPU_TYPE_X86_64:
return "Mach-O 64-bit x86-64";
case llvm::MachO::CPU_TYPE_ARM64:
return "Mach-O arm64";
case llvm::MachO::CPU_TYPE_POWERPC64:
return "Mach-O 64-bit ppc64";
default:
@ -1271,6 +1291,8 @@ Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) {
return Triple::x86_64;
case llvm::MachO::CPU_TYPE_ARM:
return Triple::arm;
case llvm::MachO::CPU_TYPE_ARM64:
return Triple::arm64;
case llvm::MachO::CPU_TYPE_POWERPC:
return Triple::ppc;
case llvm::MachO::CPU_TYPE_POWERPC64:

View File

@ -23,6 +23,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case aarch64_be: return "aarch64_be";
case arm: return "arm";
case armeb: return "armeb";
case arm64: return "arm64";
case hexagon: return "hexagon";
case mips: return "mips";
case mipsel: return "mipsel";
@ -66,6 +67,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case thumb:
case thumbeb: return "arm";
case arm64: return "arm64";
case ppc64:
case ppc64le:
case ppc: return "ppc";
@ -91,6 +94,7 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case nvptx: return "nvptx";
case nvptx64: return "nvptx";
case le32: return "le32";
case amdil: return "amdil";
case spir: return "spir";
@ -173,6 +177,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("aarch64_be", aarch64_be)
.Case("arm", arm)
.Case("armeb", armeb)
.Case("arm64", arm64)
.Case("mips", mips)
.Case("mipsel", mipsel)
.Case("mips64", mips64)
@ -219,6 +224,7 @@ const char *Triple::getArchNameForAssembler() {
.Cases("armv6", "thumbv6", "armv6")
.Cases("armv7", "thumbv7", "armv7")
.Case("armeb", "armeb")
.Case("arm64", "arm64")
.Case("r600", "r600")
.Case("nvptx", "nvptx")
.Case("nvptx64", "nvptx64")
@ -250,6 +256,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.StartsWith("thumbv", Triple::thumb)
.Case("thumbeb", Triple::thumbeb)
.StartsWith("thumbebv", Triple::thumbeb)
.Case("arm64", Triple::arm64)
.Case("msp430", Triple::msp430)
.Cases("mips", "mipseb", "mipsallegrex", Triple::mips)
.Cases("mipsel", "mipsallegrexel", Triple::mipsel)
@ -681,9 +688,9 @@ void Triple::getiOSVersion(unsigned &Major, unsigned &Minor,
break;
case IOS:
getOSVersion(Major, Minor, Micro);
// Default to 5.0.
// Default to 5.0 (or 7.0 for arm64).
if (Major == 0)
Major = 5;
Major = (getArch() == arm64) ? 7 : 5;
break;
}
}
@ -771,6 +778,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::spir:
return 32;
case llvm::Triple::arm64:
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_be:
case llvm::Triple::mips64:
@ -838,6 +846,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::sparcv9: T.setArch(Triple::sparc); break;
case Triple::x86_64: T.setArch(Triple::x86); break;
case Triple::spir64: T.setArch(Triple::spir); break;
case Triple::arm64: T.setArch(Triple::arm); break;
}
return T;
}
@ -847,7 +856,6 @@ Triple Triple::get64BitArchVariant() const {
switch (getArch()) {
case Triple::UnknownArch:
case Triple::amdil:
case Triple::arm:
case Triple::armeb:
case Triple::hexagon:
case Triple::le32:
@ -871,6 +879,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::sparcv9:
case Triple::systemz:
case Triple::x86_64:
case Triple::arm64:
// Already 64-bit.
break;
@ -881,6 +890,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::sparc: T.setArch(Triple::sparcv9); break;
case Triple::x86: T.setArch(Triple::x86_64); break;
case Triple::spir: T.setArch(Triple::spir64); break;
case Triple::arm: T.setArch(Triple::arm64); break;
}
return T;
}

View File

@ -205,7 +205,7 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
void* start = NearBlock ? (unsigned char*)NearBlock->base() +
NearBlock->size() : 0;
#if defined(__APPLE__) && defined(__arm__)
#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
void *pa = ::mmap(start, PageSize*NumPages, PROT_READ|PROT_EXEC,
flags, fd, 0);
#else
@ -220,7 +220,7 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
return MemoryBlock();
}
#if defined(__APPLE__) && defined(__arm__)
#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)pa,
(vm_size_t)(PageSize*NumPages), 0,
VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
@ -253,7 +253,7 @@ bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
}
bool Memory::setWritable (MemoryBlock &M, std::string *ErrMsg) {
#if defined(__APPLE__) && defined(__arm__)
#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
if (M.Address == 0 || M.Size == 0) return false;
Memory::InvalidateInstructionCache(M.Address, M.Size);
kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
@ -265,7 +265,7 @@ bool Memory::setWritable (MemoryBlock &M, std::string *ErrMsg) {
}
bool Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) {
#if defined(__APPLE__) && defined(__arm__)
#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
if (M.Address == 0 || M.Size == 0) return false;
Memory::InvalidateInstructionCache(M.Address, M.Size);
kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
@ -280,7 +280,7 @@ bool Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) {
}
bool Memory::setRangeWritable(const void *Addr, size_t Size) {
#if defined(__APPLE__) && defined(__arm__)
#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
(vm_size_t)Size, 0,
VM_PROT_READ | VM_PROT_WRITE);
@ -291,7 +291,7 @@ bool Memory::setRangeWritable(const void *Addr, size_t Size) {
}
bool Memory::setRangeExecutable(const void *Addr, size_t Size) {
#if defined(__APPLE__) && defined(__arm__)
#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
(vm_size_t)Size, 0,
VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
@ -311,7 +311,8 @@ void Memory::InvalidateInstructionCache(const void *Addr,
#if defined(__APPLE__)
# if (defined(__POWERPC__) || defined (__ppc__) || \
defined(_POWER) || defined(_ARCH_PPC)) || defined(__arm__)
defined(_POWER) || defined(_ARCH_PPC) || defined(__arm__) || \
defined(__arm64__))
sys_icache_invalidate(const_cast<void *>(Addr), Len);
# endif

48
lib/Target/ARM64/ARM64.h Normal file
View File

@ -0,0 +1,48 @@
//===-- ARM64.h - Top-level interface for ARM64 representation --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the entry points for global functions defined in the LLVM
// ARM64 back-end.
//
//===----------------------------------------------------------------------===//
#ifndef TARGET_ARM64_H
#define TARGET_ARM64_H
#include "MCTargetDesc/ARM64BaseInfo.h"
#include "MCTargetDesc/ARM64MCTargetDesc.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/DataTypes.h"
namespace llvm {
class ARM64TargetMachine;
class FunctionPass;
class MachineFunctionPass;
FunctionPass *createARM64DeadRegisterDefinitions();
FunctionPass *createARM64ConditionalCompares();
FunctionPass *createARM64AdvSIMDScalar();
FunctionPass *createARM64BranchRelaxation();
FunctionPass *createARM64ISelDag(ARM64TargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createARM64StorePairSuppressPass();
FunctionPass *createARM64ExpandPseudoPass();
FunctionPass *createARM64LoadStoreOptimizationPass();
ModulePass *createARM64PromoteConstantPass();
FunctionPass *createARM64AddressTypePromotionPass();
/// \brief Creates an ARM-specific Target Transformation Info pass.
ImmutablePass *createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM);
FunctionPass *createARM64CleanupLocalDynamicTLSPass();
FunctionPass *createARM64CollectLOHPass();
} // end namespace llvm
#endif

95
lib/Target/ARM64/ARM64.td Normal file
View File

@ -0,0 +1,95 @@
//===- ARM64.td - Describe the ARM64 Target Machine --------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Target-independent interfaces which we are implementing
//===----------------------------------------------------------------------===//
include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
// ARM64 Subtarget features.
//
/// Cyclone has register move instructions which are "free".
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
"Has zereo-cycle register moves">;
/// Cyclone has instructions which zero registers for "free".
def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
"Has zero-cycle zeroing instructions">;
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
include "ARM64RegisterInfo.td"
include "ARM64CallingConvention.td"
//===----------------------------------------------------------------------===//
// Instruction Descriptions
//===----------------------------------------------------------------------===//
include "ARM64Schedule.td"
include "ARM64InstrInfo.td"
def ARM64InstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
// ARM64 Processors supported.
//
include "ARM64SchedCyclone.td"
def : ProcessorModel<"arm64-generic", NoSchedModel, []>;
def : ProcessorModel<"cyclone", CycloneModel, [FeatureZCRegMove, FeatureZCZeroing]>;
//===----------------------------------------------------------------------===//
// Assembly parser
//===----------------------------------------------------------------------===//
def GenericAsmParserVariant : AsmParserVariant {
int Variant = 0;
string Name = "generic";
}
def AppleAsmParserVariant : AsmParserVariant {
int Variant = 1;
string Name = "apple-neon";
}
//===----------------------------------------------------------------------===//
// Assembly printer
//===----------------------------------------------------------------------===//
// ARM64 Uses the MC printer for asm output, so make sure the TableGen
// AsmWriter bits get associated with the correct class.
def GenericAsmWriter : AsmWriter {
string AsmWriterClassName = "InstPrinter";
int Variant = 0;
bit isMCAsmWriter = 1;
}
def AppleAsmWriter : AsmWriter {
let AsmWriterClassName = "AppleInstPrinter";
int Variant = 1;
int isMCAsmWriter = 1;
}
//===----------------------------------------------------------------------===//
// Target Declaration
//===----------------------------------------------------------------------===//
def ARM64 : Target {
let InstructionSet = ARM64InstrInfo;
let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant];
let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter];
}

View File

@ -0,0 +1,505 @@
//===-- ARM64AddressTypePromotion.cpp --- Promote type for addr accesses -===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass tries to promote the computations use to obtained a sign extended
// value used into memory accesses.
// E.g.
// a = add nsw i32 b, 3
// d = sext i32 a to i64
// e = getelementptr ..., i64 d
//
// =>
// f = sext i32 b to i64
// a = add nsw i64 f, 3
// e = getelementptr ..., i64 a
//
// This is legal to do so if the computations are markers with either nsw or nuw
// markers.
// Moreover, the current heuristic is simple: it does not create new sext
// operations, i.e., it gives up when a sext would have forked (e.g., if
// a = add i32 b, c, two sexts are required to promote the computation).
//
// FIXME: This pass may be useful for other targets too.
// ===---------------------------------------------------------------------===//
#define DEBUG_TYPE "arm64-type-promotion"
#include "ARM64.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
static cl::opt<bool>
EnableAddressTypePromotion("arm64-type-promotion", cl::Hidden,
cl::desc("Enable the type promotion pass"),
cl::init(true));
static cl::opt<bool>
EnableMerge("arm64-type-promotion-merge", cl::Hidden,
cl::desc("Enable merging of redundant sexts when one is dominating"
" the other."),
cl::init(true));
//===----------------------------------------------------------------------===//
// ARM64AddressTypePromotion
//===----------------------------------------------------------------------===//
namespace llvm {
void initializeARM64AddressTypePromotionPass(PassRegistry &);
}
namespace {
class ARM64AddressTypePromotion : public FunctionPass {
public:
static char ID;
ARM64AddressTypePromotion()
: FunctionPass(ID), Func(NULL), ConsideredSExtType(NULL) {
initializeARM64AddressTypePromotionPass(*PassRegistry::getPassRegistry());
}
virtual const char *getPassName() const {
return "ARM64 Address Type Promotion";
}
/// Iterate over the functions and promote the computation of interesting
// sext instructions.
bool runOnFunction(Function &F);
private:
/// The current function.
Function *Func;
/// Filter out all sexts that does not have this type.
/// Currently initialized with Int64Ty.
Type *ConsideredSExtType;
// This transformation requires dominator info.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
}
typedef SmallPtrSet<Instruction *, 32> SetOfInstructions;
typedef SmallVector<Instruction *, 16> Instructions;
typedef DenseMap<Value *, Instructions> ValueToInsts;
/// Check if it is profitable to move a sext through this instruction.
/// Currently, we consider it is profitable if:
/// - Inst is used only once (no need to insert truncate).
/// - Inst has only one operand that will require a sext operation (we do
/// do not create new sext operation).
bool shouldGetThrough(const Instruction *Inst);
/// Check if it is possible and legal to move a sext through this
/// instruction.
/// Current heuristic considers that we can get through:
/// - Arithmetic operation marked with the nsw or nuw flag.
/// - Other sext operation.
/// - Truncate operation if it was just dropping sign extended bits.
bool canGetThrough(const Instruction *Inst);
/// Move sext operations through safe to sext instructions.
bool propagateSignExtension(Instructions &SExtInsts);
/// Is this sext should be considered for code motion.
/// We look for sext with ConsideredSExtType and uses in at least one
// GetElementPtrInst.
bool shouldConsiderSExt(const Instruction *SExt) const;
/// Collect all interesting sext operations, i.e., the ones with the right
/// type and used in memory accesses.
/// More precisely, a sext instruction is considered as interesting if it
/// is used in a "complex" getelementptr or it exits at least another
/// sext instruction that sign extended the same initial value.
/// A getelementptr is considered as "complex" if it has more than 2
// operands.
void analyzeSExtension(Instructions &SExtInsts);
/// Merge redundant sign extension operations in common dominator.
void mergeSExts(ValueToInsts &ValToSExtendedUses,
SetOfInstructions &ToRemove);
};
} // end anonymous namespace.
char ARM64AddressTypePromotion::ID = 0;
INITIALIZE_PASS_BEGIN(ARM64AddressTypePromotion, "arm64-type-promotion",
"ARM64 Type Promotion Pass", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(ARM64AddressTypePromotion, "arm64-type-promotion",
"ARM64 Type Promotion Pass", false, false)
FunctionPass *llvm::createARM64AddressTypePromotionPass() {
return new ARM64AddressTypePromotion();
}
bool ARM64AddressTypePromotion::canGetThrough(const Instruction *Inst) {
if (isa<SExtInst>(Inst))
return true;
const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
(BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap()))
return true;
// sext(trunc(sext)) --> sext
if (isa<TruncInst>(Inst) && isa<SExtInst>(Inst->getOperand(0))) {
const Instruction *Opnd = cast<Instruction>(Inst->getOperand(0));
// Check that the truncate just drop sign extended bits.
if (Inst->getType()->getIntegerBitWidth() >=
Opnd->getOperand(0)->getType()->getIntegerBitWidth() &&
Inst->getOperand(0)->getType()->getIntegerBitWidth() <=
ConsideredSExtType->getIntegerBitWidth())
return true;
}
return false;
}
bool ARM64AddressTypePromotion::shouldGetThrough(const Instruction *Inst) {
// If the type of the sext is the same as the considered one, this sext
// will become useless.
// Otherwise, we will have to do something to preserve the original value,
// unless it is used once.
if (isa<SExtInst>(Inst) &&
(Inst->getType() == ConsideredSExtType || Inst->hasOneUse()))
return true;
// If the Inst is used more that once, we may need to insert truncate
// operations and we don't do that at the moment.
if (!Inst->hasOneUse())
return false;
// This truncate is used only once, thus if we can get thourgh, it will become
// useless.
if (isa<TruncInst>(Inst))
return true;
// If both operands are not constant, a new sext will be created here.
// Current heuristic is: each step should be profitable.
// Therefore we don't allow to increase the number of sext even if it may
// be profitable later on.
if (isa<BinaryOperator>(Inst) && isa<ConstantInt>(Inst->getOperand(1)))
return true;
return false;
}
static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) {
if (isa<SelectInst>(Inst) && OpIdx == 0)
return false;
return true;
}
bool
ARM64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const {
if (SExt->getType() != ConsideredSExtType)
return false;
for (Value::const_use_iterator UseIt = SExt->use_begin(),
EndUseIt = SExt->use_end();
UseIt != EndUseIt; ++UseIt) {
if (isa<GetElementPtrInst>(*UseIt))
return true;
}
return false;
}
// Input:
// - SExtInsts contains all the sext instructions that are use direclty in
// GetElementPtrInst, i.e., access to memory.
// Algorithm:
// - For each sext operation in SExtInsts:
// Let var be the operand of sext.
// while it is profitable (see shouldGetThrough), legal, and safe
// (see canGetThrough) to move sext through var's definition:
// * promote the type of var's definition.
// * fold var into sext uses.
// * move sext above var's definition.
// * update sext operand to use the operand of var that should be sign
// extended (by construction there is only one).
//
// E.g.,
// a = ... i32 c, 3
// b = sext i32 a to i64 <- is it legal/safe/profitable to get through 'a'
// ...
// = b
// => Yes, update the code
// b = sext i32 c to i64
// a = ... i64 b, 3
// ...
// = a
// Iterate on 'c'.
bool
ARM64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
DEBUG(dbgs() << "*** Propagate Sign Extension ***\n");
bool LocalChange = false;
SetOfInstructions ToRemove;
ValueToInsts ValToSExtendedUses;
while (!SExtInsts.empty()) {
// Get through simple chain.
Instruction *SExt = SExtInsts.pop_back_val();
DEBUG(dbgs() << "Consider:\n" << *SExt << '\n');
// If this SExt has already been merged continue.
if (SExt->use_empty() && ToRemove.count(SExt)) {
DEBUG(dbgs() << "No uses => marked as delete\n");
continue;
}
// Now try to get through the chain of definitions.
while (isa<Instruction>(SExt->getOperand(0))) {
Instruction *Inst = dyn_cast<Instruction>(SExt->getOperand(0));
DEBUG(dbgs() << "Try to get through:\n" << *Inst << '\n');
if (!canGetThrough(Inst) || !shouldGetThrough(Inst)) {
// We cannot get through something that is not an Instruction
// or not safe to SExt.
DEBUG(dbgs() << "Cannot get through\n");
break;
}
LocalChange = true;
// If this is a sign extend, it becomes useless.
if (isa<SExtInst>(Inst) || isa<TruncInst>(Inst)) {
DEBUG(dbgs() << "SExt or trunc, mark it as to remove\n");
// We cannot use replaceAllUsesWith here because we may trigger some
// assertion on the type as all involved sext operation may have not
// been moved yet.
while (!Inst->use_empty()) {
Value::use_iterator UseIt = Inst->use_begin();
Instruction *UseInst = dyn_cast<Instruction>(*UseIt);
assert(UseInst && "Use of sext is not an Instruction!");
UseInst->setOperand(UseIt->getOperandNo(), SExt);
}
ToRemove.insert(Inst);
SExt->setOperand(0, Inst->getOperand(0));
SExt->moveBefore(Inst);
continue;
}
// Get through the Instruction:
// 1. Update its type.
// 2. Replace the uses of SExt by Inst.
// 3. Sign extend each operand that needs to be sign extended.
// Step #1.
Inst->mutateType(SExt->getType());
// Step #2.
SExt->replaceAllUsesWith(Inst);
// Step #3.
Instruction *SExtForOpnd = SExt;
DEBUG(dbgs() << "Propagate SExt to operands\n");
for (int OpIdx = 0, EndOpIdx = Inst->getNumOperands(); OpIdx != EndOpIdx;
++OpIdx) {
DEBUG(dbgs() << "Operand:\n" << *(Inst->getOperand(OpIdx)) << '\n');
if (Inst->getOperand(OpIdx)->getType() == SExt->getType() ||
!shouldSExtOperand(Inst, OpIdx)) {
DEBUG(dbgs() << "No need to propagate\n");
continue;
}
// Check if we can statically sign extend the operand.
Value *Opnd = Inst->getOperand(OpIdx);
if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
DEBUG(dbgs() << "Statically sign extend\n");
Inst->setOperand(OpIdx, ConstantInt::getSigned(SExt->getType(),
Cst->getSExtValue()));
continue;
}
// UndefValue are typed, so we have to statically sign extend them.
if (isa<UndefValue>(Opnd)) {
DEBUG(dbgs() << "Statically sign extend\n");
Inst->setOperand(OpIdx, UndefValue::get(SExt->getType()));
continue;
}
// Otherwise we have to explicity sign extend it.
assert(SExtForOpnd &&
"Only one operand should have been sign extended");
SExtForOpnd->setOperand(0, Opnd);
DEBUG(dbgs() << "Move before:\n" << *Inst << "\nSign extend\n");
// Move the sign extension before the insertion point.
SExtForOpnd->moveBefore(Inst);
Inst->setOperand(OpIdx, SExtForOpnd);
// If more sext are required, new instructions will have to be created.
SExtForOpnd = NULL;
}
if (SExtForOpnd == SExt) {
DEBUG(dbgs() << "Sign extension is useless now\n");
ToRemove.insert(SExt);
break;
}
}
// If the use is already of the right type, connect its uses to its argument
// and delete it.
// This can happen for an Instruction which all uses are sign extended.
if (!ToRemove.count(SExt) &&
SExt->getType() == SExt->getOperand(0)->getType()) {
DEBUG(dbgs() << "Sign extension is useless, attach its use to "
"its argument\n");
SExt->replaceAllUsesWith(SExt->getOperand(0));
ToRemove.insert(SExt);
} else
ValToSExtendedUses[SExt->getOperand(0)].push_back(SExt);
}
if (EnableMerge)
mergeSExts(ValToSExtendedUses, ToRemove);
// Remove all instructions marked as ToRemove.
for (SetOfInstructions::iterator ToRemoveIt = ToRemove.begin(),
EndToRemoveIt = ToRemove.end();
ToRemoveIt != EndToRemoveIt; ++ToRemoveIt)
(*ToRemoveIt)->eraseFromParent();
return LocalChange;
}
void ARM64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses,
SetOfInstructions &ToRemove) {
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
for (ValueToInsts::iterator It = ValToSExtendedUses.begin(),
EndIt = ValToSExtendedUses.end();
It != EndIt; ++It) {
Instructions &Insts = It->second;
Instructions CurPts;
for (Instructions::iterator IIt = Insts.begin(), EndIIt = Insts.end();
IIt != EndIIt; ++IIt) {
if (ToRemove.count(*IIt))
continue;
bool inserted = false;
for (Instructions::iterator CurPtsIt = CurPts.begin(),
EndCurPtsIt = CurPts.end();
CurPtsIt != EndCurPtsIt; ++CurPtsIt) {
if (DT.dominates(*IIt, *CurPtsIt)) {
DEBUG(dbgs() << "Replace all uses of:\n" << **CurPtsIt << "\nwith:\n"
<< **IIt << '\n');
(*CurPtsIt)->replaceAllUsesWith(*IIt);
ToRemove.insert(*CurPtsIt);
*CurPtsIt = *IIt;
inserted = true;
break;
}
if (!DT.dominates(*CurPtsIt, *IIt))
// Give up if we need to merge in a common dominator as the
// expermients show it is not profitable.
continue;
DEBUG(dbgs() << "Replace all uses of:\n" << **IIt << "\nwith:\n"
<< **CurPtsIt << '\n');
(*IIt)->replaceAllUsesWith(*CurPtsIt);
ToRemove.insert(*IIt);
inserted = true;
break;
}
if (!inserted)
CurPts.push_back(*IIt);
}
}
}
void ARM64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) {
DEBUG(dbgs() << "*** Analyze Sign Extensions ***\n");
DenseMap<Value *, Instruction *> SeenChains;
for (Function::iterator IBB = Func->begin(), IEndBB = Func->end();
IBB != IEndBB; ++IBB) {
for (BasicBlock::iterator II = IBB->begin(), IEndI = IBB->end();
II != IEndI; ++II) {
// Collect all sext operation per type.
if (!isa<SExtInst>(II) || !shouldConsiderSExt(II))
continue;
Instruction *SExt = II;
DEBUG(dbgs() << "Found:\n" << (*II) << '\n');
// Cases where we actually perform the optimization:
// 1. SExt is used in a getelementptr with more than 2 operand =>
// likely we can merge some computation if they are done on 64 bits.
// 2. The beginning of the SExt chain is SExt several time. =>
// code sharing is possible.
bool insert = false;
// #1.
for (Value::use_iterator UseIt = SExt->use_begin(),
EndUseIt = SExt->use_end();
UseIt != EndUseIt; ++UseIt) {
const Instruction *Inst = dyn_cast<GetElementPtrInst>(*UseIt);
if (Inst && Inst->getNumOperands() > 2) {
DEBUG(dbgs() << "Interesting use in GetElementPtrInst\n" << *Inst
<< '\n');
insert = true;
break;
}
}
// #2.
// Check the head of the chain.
Instruction *Inst = SExt;
Value *Last;
do {
int OpdIdx = 0;
const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
if (BinOp && isa<ConstantInt>(BinOp->getOperand(0)))
OpdIdx = 1;
Last = Inst->getOperand(OpdIdx);
Inst = dyn_cast<Instruction>(Last);
} while (Inst && canGetThrough(Inst) && shouldGetThrough(Inst));
DEBUG(dbgs() << "Head of the chain:\n" << *Last << '\n');
DenseMap<Value *, Instruction *>::iterator AlreadySeen =
SeenChains.find(Last);
if (insert || AlreadySeen != SeenChains.end()) {
DEBUG(dbgs() << "Insert\n");
SExtInsts.push_back(II);
if (AlreadySeen != SeenChains.end() && AlreadySeen->second != NULL) {
DEBUG(dbgs() << "Insert chain member\n");
SExtInsts.push_back(AlreadySeen->second);
SeenChains[Last] = NULL;
}
} else {
DEBUG(dbgs() << "Record its chain membership\n");
SeenChains[Last] = SExt;
}
}
}
}
bool ARM64AddressTypePromotion::runOnFunction(Function &F) {
if (!EnableAddressTypePromotion || F.isDeclaration())
return false;
Func = &F;
ConsideredSExtType = Type::getInt64Ty(Func->getContext());
DEBUG(dbgs() << "*** " << getPassName() << ": " << Func->getName() << '\n');
Instructions SExtInsts;
analyzeSExtension(SExtInsts);
return propagateSignExtension(SExtInsts);
}

View File

@ -0,0 +1,392 @@
//===-- ARM64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// When profitable, replace GPR targeting i64 instructions with their
// AdvSIMD scalar equivalents. Generally speaking, "profitable" is defined
// as minimizing the number of cross-class register copies.
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// TODO: Graph based predicate heuristics.
// Walking the instruction list linearly will get many, perhaps most, of
// the cases, but to do a truly throrough job of this, we need a more
// wholistic approach.
//
// This optimization is very similar in spirit to the register allocator's
// spill placement, only here we're determining where to place cross-class
// register copies rather than spills. As such, a similar approach is
// called for.
//
// We want to build up a set of graphs of all instructions which are candidates
// for transformation along with instructions which generate their inputs and
// consume their outputs. For each edge in the graph, we assign a weight
// based on whether there is a copy required there (weight zero if not) and
// the block frequency of the block containing the defining or using
// instruction, whichever is less. Our optimization is then a graph problem
// to minimize the total weight of all the graphs, then transform instructions
// and add or remove copy instructions as called for to implement the
// solution.
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm64-simd-scalar"
#include "ARM64.h"
#include "ARM64InstrInfo.h"
#include "ARM64RegisterInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
static cl::opt<bool>
AdvSIMDScalar("arm64-simd-scalar",
cl::desc("enable use of AdvSIMD scalar integer instructions"),
cl::init(false), cl::Hidden);
// Allow forcing all i64 operations with equivalent SIMD instructions to use
// them. For stress-testing the transformation function.
static cl::opt<bool>
TransformAll("arm64-simd-scalar-force-all",
cl::desc("Force use of AdvSIMD scalar instructions everywhere"),
cl::init(false), cl::Hidden);
STATISTIC(NumScalarInsnsUsed, "Number of scalar instructions used");
STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted");
STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted");
namespace {
class ARM64AdvSIMDScalar : public MachineFunctionPass {
MachineRegisterInfo *MRI;
const ARM64InstrInfo *TII;
private:
// isProfitableToTransform - Predicate function to determine whether an
// instruction should be transformed to its equivalent AdvSIMD scalar
// instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example.
bool isProfitableToTransform(const MachineInstr *MI) const;
// tranformInstruction - Perform the transformation of an instruction
// to its equivalant AdvSIMD scalar instruction. Update inputs and outputs
// to be the correct register class, minimizing cross-class copies.
void transformInstruction(MachineInstr *MI);
// processMachineBasicBlock - Main optimzation loop.
bool processMachineBasicBlock(MachineBasicBlock *MBB);
public:
static char ID; // Pass identification, replacement for typeid.
explicit ARM64AdvSIMDScalar() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &F);
const char *getPassName() const {
return "AdvSIMD scalar operation optimization";
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
char ARM64AdvSIMDScalar::ID = 0;
} // end anonymous namespace
static bool isGPR64(unsigned Reg, unsigned SubReg,
const MachineRegisterInfo *MRI) {
if (SubReg)
return false;
if (TargetRegisterInfo::isVirtualRegister(Reg))
return MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::GPR64RegClass);
return ARM64::GPR64RegClass.contains(Reg);
}
static bool isFPR64(unsigned Reg, unsigned SubReg,
const MachineRegisterInfo *MRI) {
if (TargetRegisterInfo::isVirtualRegister(Reg))
return (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR64RegClass) &&
SubReg == 0) ||
(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR128RegClass) &&
SubReg == ARM64::dsub);
// Physical register references just check the regist class directly.
return (ARM64::FPR64RegClass.contains(Reg) && SubReg == 0) ||
(ARM64::FPR128RegClass.contains(Reg) && SubReg == ARM64::dsub);
}
// getSrcFromCopy - Get the original source register for a GPR64 <--> FPR64
// copy instruction. Return zero_reg if the instruction is not a copy.
static unsigned getSrcFromCopy(const MachineInstr *MI,
const MachineRegisterInfo *MRI,
unsigned &SubReg) {
SubReg = 0;
// The "FMOV Xd, Dn" instruction is the typical form.
if (MI->getOpcode() == ARM64::FMOVDXr || MI->getOpcode() == ARM64::FMOVXDr)
return MI->getOperand(1).getReg();
// A lane zero extract "UMOV.d Xd, Vn[0]" is equivalent. We shouldn't see
// these at this stage, but it's easy to check for.
if (MI->getOpcode() == ARM64::UMOVvi64 && MI->getOperand(2).getImm() == 0) {
SubReg = ARM64::dsub;
return MI->getOperand(1).getReg();
}
// Or just a plain COPY instruction. This can be directly to/from FPR64,
// or it can be a dsub subreg reference to an FPR128.
if (MI->getOpcode() == ARM64::COPY) {
if (isFPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(),
MRI) &&
isGPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(), MRI))
return MI->getOperand(1).getReg();
if (isGPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(),
MRI) &&
isFPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(),
MRI)) {
SubReg = ARM64::dsub;
return MI->getOperand(1).getReg();
}
}
// Otherwise, this is some other kind of instruction.
return 0;
}
// getTransformOpcode - For any opcode for which there is an AdvSIMD equivalent
// that we're considering transforming to, return that AdvSIMD opcode. For all
// others, return the original opcode.
static int getTransformOpcode(unsigned Opc) {
switch (Opc) {
default:
break;
// FIXME: Lots more possibilities.
case ARM64::ADDXrr:
return ARM64::ADDv1i64;
case ARM64::SUBXrr:
return ARM64::SUBv1i64;
}
// No AdvSIMD equivalent, so just return the original opcode.
return Opc;
}
static bool isTransformable(const MachineInstr *MI) {
int Opc = MI->getOpcode();
return Opc != getTransformOpcode(Opc);
}
// isProfitableToTransform - Predicate function to determine whether an
// instruction should be transformed to its equivalent AdvSIMD scalar
// instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example.
bool ARM64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const {
// If this instruction isn't eligible to be transformed (no SIMD equivalent),
// early exit since that's the common case.
if (!isTransformable(MI))
return false;
// Count the number of copies we'll need to add and approximate the number
// of copies that a transform will enable us to remove.
unsigned NumNewCopies = 3;
unsigned NumRemovableCopies = 0;
unsigned OrigSrc0 = MI->getOperand(1).getReg();
unsigned OrigSrc1 = MI->getOperand(2).getReg();
unsigned Src0 = 0, SubReg0;
unsigned Src1 = 0, SubReg1;
if (!MRI->def_empty(OrigSrc0)) {
MachineRegisterInfo::def_instr_iterator Def =
MRI->def_instr_begin(OrigSrc0);
assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
Src0 = getSrcFromCopy(&*Def, MRI, SubReg0);
// If the source was from a copy, we don't need to insert a new copy.
if (Src0)
--NumNewCopies;
// If there are no other users of the original source, we can delete
// that instruction.
if (Src0 && MRI->hasOneNonDBGUse(OrigSrc0))
++NumRemovableCopies;
}
if (!MRI->def_empty(OrigSrc1)) {
MachineRegisterInfo::def_instr_iterator Def =
MRI->def_instr_begin(OrigSrc1);
assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
Src1 = getSrcFromCopy(&*Def, MRI, SubReg1);
if (Src1)
--NumNewCopies;
// If there are no other users of the original source, we can delete
// that instruction.
if (Src1 && MRI->hasOneNonDBGUse(OrigSrc1))
++NumRemovableCopies;
}
// If any of the uses of the original instructions is a cross class copy,
// that's a copy that will be removable if we transform. Likewise, if
// any of the uses is a transformable instruction, it's likely the tranforms
// will chain, enabling us to save a copy there, too. This is an aggressive
// heuristic that approximates the graph based cost analysis described above.
unsigned Dst = MI->getOperand(0).getReg();
bool AllUsesAreCopies = true;
for (MachineRegisterInfo::use_instr_nodbg_iterator
Use = MRI->use_instr_nodbg_begin(Dst),
E = MRI->use_instr_nodbg_end();
Use != E; ++Use) {
unsigned SubReg;
if (getSrcFromCopy(&*Use, MRI, SubReg) || isTransformable(&*Use))
++NumRemovableCopies;
// If the use is an INSERT_SUBREG, that's still something that can
// directly use the FPR64, so we don't invalidate AllUsesAreCopies. It's
// preferable to have it use the FPR64 in most cases, as if the source
// vector is an IMPLICIT_DEF, the INSERT_SUBREG just goes away entirely.
// Ditto for a lane insert.
else if (Use->getOpcode() == ARM64::INSERT_SUBREG ||
Use->getOpcode() == ARM64::INSvi64gpr)
;
else
AllUsesAreCopies = false;
}
// If all of the uses of the original destination register are copies to
// FPR64, then we won't end up having a new copy back to GPR64 either.
if (AllUsesAreCopies)
--NumNewCopies;
// If a tranform will not increase the number of cross-class copies required,
// return true.
if (NumNewCopies <= NumRemovableCopies)
return true;
// Finally, even if we otherwise wouldn't transform, check if we're forcing
// transformation of everything.
return TransformAll;
}
static MachineInstr *insertCopy(const ARM64InstrInfo *TII, MachineInstr *MI,
unsigned Dst, unsigned Src, bool IsKill) {
MachineInstrBuilder MIB =
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(ARM64::COPY),
Dst)
.addReg(Src, getKillRegState(IsKill));
DEBUG(dbgs() << " adding copy: " << *MIB);
++NumCopiesInserted;
return MIB;
}
// tranformInstruction - Perform the transformation of an instruction
// to its equivalant AdvSIMD scalar instruction. Update inputs and outputs
// to be the correct register class, minimizing cross-class copies.
void ARM64AdvSIMDScalar::transformInstruction(MachineInstr *MI) {
DEBUG(dbgs() << "Scalar transform: " << *MI);
MachineBasicBlock *MBB = MI->getParent();
int OldOpc = MI->getOpcode();
int NewOpc = getTransformOpcode(OldOpc);
assert(OldOpc != NewOpc && "transform an instruction to itself?!");
// Check if we need a copy for the source registers.
unsigned OrigSrc0 = MI->getOperand(1).getReg();
unsigned OrigSrc1 = MI->getOperand(2).getReg();
unsigned Src0 = 0, SubReg0;
unsigned Src1 = 0, SubReg1;
if (!MRI->def_empty(OrigSrc0)) {
MachineRegisterInfo::def_instr_iterator Def =
MRI->def_instr_begin(OrigSrc0);
assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
Src0 = getSrcFromCopy(&*Def, MRI, SubReg0);
// If there are no other users of the original source, we can delete
// that instruction.
if (Src0 && MRI->hasOneNonDBGUse(OrigSrc0)) {
assert(Src0 && "Can't delete copy w/o a valid original source!");
Def->eraseFromParent();
++NumCopiesDeleted;
}
}
if (!MRI->def_empty(OrigSrc1)) {
MachineRegisterInfo::def_instr_iterator Def =
MRI->def_instr_begin(OrigSrc1);
assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
Src1 = getSrcFromCopy(&*Def, MRI, SubReg1);
// If there are no other users of the original source, we can delete
// that instruction.
if (Src1 && MRI->hasOneNonDBGUse(OrigSrc1)) {
assert(Src1 && "Can't delete copy w/o a valid original source!");
Def->eraseFromParent();
++NumCopiesDeleted;
}
}
// If we weren't able to reference the original source directly, create a
// copy.
if (!Src0) {
SubReg0 = 0;
Src0 = MRI->createVirtualRegister(&ARM64::FPR64RegClass);
insertCopy(TII, MI, Src0, OrigSrc0, true);
}
if (!Src1) {
SubReg1 = 0;
Src1 = MRI->createVirtualRegister(&ARM64::FPR64RegClass);
insertCopy(TII, MI, Src1, OrigSrc1, true);
}
// Create a vreg for the destination.
// FIXME: No need to do this if the ultimate user expects an FPR64.
// Check for that and avoid the copy if possible.
unsigned Dst = MRI->createVirtualRegister(&ARM64::FPR64RegClass);
// For now, all of the new instructions have the same simple three-register
// form, so no need to special case based on what instruction we're
// building.
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(NewOpc), Dst)
.addReg(Src0, getKillRegState(true), SubReg0)
.addReg(Src1, getKillRegState(true), SubReg1);
// Now copy the result back out to a GPR.
// FIXME: Try to avoid this if all uses could actually just use the FPR64
// directly.
insertCopy(TII, MI, MI->getOperand(0).getReg(), Dst, true);
// Erase the old instruction.
MI->eraseFromParent();
++NumScalarInsnsUsed;
}
// processMachineBasicBlock - Main optimzation loop.
bool ARM64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) {
bool Changed = false;
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
MachineInstr *MI = I;
++I;
if (isProfitableToTransform(MI)) {
transformInstruction(MI);
Changed = true;
}
}
return Changed;
}
// runOnMachineFunction - Pass entry point from PassManager.
bool ARM64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
// Early exit if pass disabled.
if (!AdvSIMDScalar)
return false;
bool Changed = false;
DEBUG(dbgs() << "***** ARM64AdvSIMDScalar *****\n");
const TargetMachine &TM = mf.getTarget();
MRI = &mf.getRegInfo();
TII = static_cast<const ARM64InstrInfo *>(TM.getInstrInfo());
// Just check things on a one-block-at-a-time basis.
for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I)
if (processMachineBasicBlock(I))
Changed = true;
return Changed;
}
// createARM64AdvSIMDScalar - Factory function used by ARM64TargetMachine
// to add the pass to the PassManager.
FunctionPass *llvm::createARM64AdvSIMDScalar() {
return new ARM64AdvSIMDScalar();
}

View File

@ -0,0 +1,573 @@
//===-- ARM64AsmPrinter.cpp - ARM64 LLVM assembly writer ------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains a printer that converts from our internal representation
// of machine-dependent LLVM code to the ARM64 assembly language.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "asm-printer"
#include "ARM64.h"
#include "ARM64MachineFunctionInfo.h"
#include "ARM64MCInstLower.h"
#include "ARM64RegisterInfo.h"
#include "InstPrinter/ARM64InstPrinter.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
namespace {
class ARM64AsmPrinter : public AsmPrinter {
ARM64MCInstLower MCInstLowering;
StackMaps SM;
public:
ARM64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer), MCInstLowering(OutContext, *Mang, *this),
SM(*this), ARM64FI(NULL), LOHLabelCounter(0) {}
virtual const char *getPassName() const { return "ARM64 Assembly Printer"; }
/// \brief Wrapper for MCInstLowering.lowerOperand() for the
/// tblgen'erated pseudo lowering.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const {
return MCInstLowering.lowerOperand(MO, MCOp);
}
void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI);
void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI);
/// \brief tblgen'erated driver function for lowering simple MI->MC
/// pseudo instructions.
bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
const MachineInstr *MI);
void EmitInstruction(const MachineInstr *MI);
void getAnalysisUsage(AnalysisUsage &AU) const {
AsmPrinter::getAnalysisUsage(AU);
AU.setPreservesAll();
}
bool runOnMachineFunction(MachineFunction &F) {
ARM64FI = F.getInfo<ARM64FunctionInfo>();
return AsmPrinter::runOnMachineFunction(F);
}
private:
MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
void printOperand(const MachineInstr *MI, unsigned OpNum, raw_ostream &O);
bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O);
bool printAsmRegInClass(const MachineOperand &MO,
const TargetRegisterClass *RC, bool isVector,
raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O);
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O);
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
void EmitFunctionBodyEnd();
MCSymbol *GetCPISymbol(unsigned CPID) const;
void EmitEndOfAsmFile(Module &M);
ARM64FunctionInfo *ARM64FI;
/// \brief Emit the LOHs contained in ARM64FI.
void EmitLOHs();
typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol;
MInstToMCSymbol LOHInstToLabel;
unsigned LOHLabelCounter;
};
} // end of anonymous namespace
//===----------------------------------------------------------------------===//
void ARM64AsmPrinter::EmitEndOfAsmFile(Module &M) {
// Funny Darwin hack: This flag tells the linker that no global symbols
// contain code that falls through to other global symbols (e.g. the obvious
// implementation of multiple entry points). If this doesn't occur, the
// linker can safely perform dead code stripping. Since LLVM never
// generates code that does this, it is always safe to set.
OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
SM.serializeToStackMapSection();
}
MachineLocation
ARM64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
MachineLocation Location;
assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
// Frame address. Currently handles register +- offset only.
if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
else {
DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
}
return Location;
}
void ARM64AsmPrinter::EmitLOHs() {
const ARM64FunctionInfo::MILOHDirectives &LOHs =
const_cast<const ARM64FunctionInfo *>(ARM64FI)
->getLOHContainer()
.getDirectives();
SmallVector<MCSymbol *, 3> MCArgs;
for (ARM64FunctionInfo::MILOHDirectives::const_iterator It = LOHs.begin(),
EndIt = LOHs.end();
It != EndIt; ++It) {
const ARM64FunctionInfo::MILOHArgs &MIArgs = It->getArgs();
for (ARM64FunctionInfo::MILOHArgs::const_iterator
MIArgsIt = MIArgs.begin(),
EndMIArgsIt = MIArgs.end();
MIArgsIt != EndMIArgsIt; ++MIArgsIt) {
MInstToMCSymbol::iterator LabelIt = LOHInstToLabel.find(*MIArgsIt);
assert(LabelIt != LOHInstToLabel.end() &&
"Label hasn't been inserted for LOH related instruction");
MCArgs.push_back(LabelIt->second);
}
OutStreamer.EmitLOHDirective(It->getKind(), MCArgs);
MCArgs.clear();
}
}
void ARM64AsmPrinter::EmitFunctionBodyEnd() {
if (!ARM64FI->getLOHRelated().empty())
EmitLOHs();
}
/// GetCPISymbol - Return the symbol for the specified constant pool entry.
MCSymbol *ARM64AsmPrinter::GetCPISymbol(unsigned CPID) const {
// Darwin uses a linker-private symbol name for constant-pools (to
// avoid addends on the relocation?), ELF has no such concept and
// uses a normal private symbol.
if (getDataLayout().getLinkerPrivateGlobalPrefix()[0])
return OutContext.GetOrCreateSymbol(
Twine(getDataLayout().getLinkerPrivateGlobalPrefix()) + "CPI" +
Twine(getFunctionNumber()) + "_" + Twine(CPID));
return OutContext.GetOrCreateSymbol(
Twine(getDataLayout().getPrivateGlobalPrefix()) + "CPI" +
Twine(getFunctionNumber()) + "_" + Twine(CPID));
}
void ARM64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(OpNum);
switch (MO.getType()) {
default:
assert(0 && "<unknown operand type>");
case MachineOperand::MO_Register: {
unsigned Reg = MO.getReg();
assert(TargetRegisterInfo::isPhysicalRegister(Reg));
assert(!MO.getSubReg() && "Subregs should be eliminated!");
O << ARM64InstPrinter::getRegisterName(Reg);
break;
}
case MachineOperand::MO_Immediate: {
int64_t Imm = MO.getImm();
O << '#' << Imm;
break;
}
}
}
bool ARM64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
raw_ostream &O) {
unsigned Reg = MO.getReg();
switch (Mode) {
default:
return true; // Unknown mode.
case 'w':
Reg = getWRegFromXReg(Reg);
break;
case 'x':
Reg = getXRegFromWReg(Reg);
break;
}
O << ARM64InstPrinter::getRegisterName(Reg);
return false;
}
// Prints the register in MO using class RC using the offset in the
// new register class. This should not be used for cross class
// printing.
bool ARM64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
const TargetRegisterClass *RC,
bool isVector, raw_ostream &O) {
assert(MO.isReg() && "Should only get here with a register!");
const ARM64RegisterInfo *RI =
static_cast<const ARM64RegisterInfo *>(TM.getRegisterInfo());
unsigned Reg = MO.getReg();
unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
assert(RI->regsOverlap(RegToPrint, Reg));
O << ARM64InstPrinter::getRegisterName(
RegToPrint, isVector ? ARM64::vreg : ARM64::NoRegAltName);
return false;
}
bool ARM64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(OpNum);
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
if (ExtraCode[1] != 0)
return true; // Unknown modifier.
switch (ExtraCode[0]) {
default:
return true; // Unknown modifier.
case 'w': // Print W register
case 'x': // Print X register
if (MO.isReg())
return printAsmMRegister(MO, ExtraCode[0], O);
if (MO.isImm() && MO.getImm() == 0) {
unsigned Reg = ExtraCode[0] == 'w' ? ARM64::WZR : ARM64::XZR;
O << ARM64InstPrinter::getRegisterName(Reg);
return false;
}
printOperand(MI, OpNum, O);
return false;
case 'b': // Print B register.
case 'h': // Print H register.
case 's': // Print S register.
case 'd': // Print D register.
case 'q': // Print Q register.
if (MO.isReg()) {
const TargetRegisterClass *RC;
switch (ExtraCode[0]) {
case 'b':
RC = &ARM64::FPR8RegClass;
break;
case 'h':
RC = &ARM64::FPR16RegClass;
break;
case 's':
RC = &ARM64::FPR32RegClass;
break;
case 'd':
RC = &ARM64::FPR64RegClass;
break;
case 'q':
RC = &ARM64::FPR128RegClass;
break;
default:
return true;
}
return printAsmRegInClass(MO, RC, false /* vector */, O);
}
printOperand(MI, OpNum, O);
return false;
}
}
// According to ARM, we should emit x and v registers unless we have a
// modifier.
if (MO.isReg()) {
unsigned Reg = MO.getReg();
// If this is a w or x register, print an x register.
if (ARM64::GPR32allRegClass.contains(Reg) ||
ARM64::GPR64allRegClass.contains(Reg))
return printAsmMRegister(MO, 'x', O);
// If this is a b, h, s, d, or q register, print it as a v register.
return printAsmRegInClass(MO, &ARM64::FPR128RegClass, true /* vector */, O);
}
printOperand(MI, OpNum, O);
return false;
}
bool ARM64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned OpNum, unsigned AsmVariant,
const char *ExtraCode,
raw_ostream &O) {
if (ExtraCode && ExtraCode[0])
return true; // Unknown modifier.
const MachineOperand &MO = MI->getOperand(OpNum);
assert(MO.isReg() && "unexpected inline asm memory operand");
O << "[" << ARM64InstPrinter::getRegisterName(MO.getReg()) << "]";
return false;
}
void ARM64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
raw_ostream &OS) {
unsigned NOps = MI->getNumOperands();
assert(NOps == 4);
OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
// cast away const; DIetc do not take const operands for some reason.
DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps - 1).getMetadata()));
OS << V.getName();
OS << " <- ";
// Frame address. Currently handles register +- offset only.
assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
OS << '[';
printOperand(MI, 0, OS);
OS << '+';
printOperand(MI, 1, OS);
OS << ']';
OS << "+";
printOperand(MI, NOps - 2, OS);
}
void ARM64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI) {
unsigned NumNOPBytes = MI.getOperand(1).getImm();
SM.recordStackMap(MI);
// Emit padding.
assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
for (unsigned i = 0; i < NumNOPBytes; i += 4)
EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::HINT).addImm(0));
}
// Lower a patchpoint of the form:
// [<def>], <id>, <numBytes>, <target>, <numArgs>
void ARM64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI) {
SM.recordPatchPoint(MI);
PatchPointOpers Opers(&MI);
int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
unsigned EncodedBytes = 0;
if (CallTarget) {
assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
"High 16 bits of call target should be zero.");
unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
EncodedBytes = 16;
// Materialize the jump address:
EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVZWi)
.addReg(ScratchReg)
.addImm((CallTarget >> 32) & 0xFFFF)
.addImm(32));
EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVKWi)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm((CallTarget >> 16) & 0xFFFF)
.addImm(16));
EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVKWi)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm(CallTarget & 0xFFFF)
.addImm(0));
EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::BLR).addReg(ScratchReg));
}
// Emit padding.
unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
assert(NumBytes >= EncodedBytes &&
"Patchpoint can't request size less than the length of a call.");
assert((NumBytes - EncodedBytes) % 4 == 0 &&
"Invalid number of NOP bytes requested!");
for (unsigned i = EncodedBytes; i < NumBytes; i += 4)
EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::HINT).addImm(0));
}
// Simple pseudo-instructions have their lowering (with expansion to real
// instructions) auto-generated.
#include "ARM64GenMCPseudoLowering.inc"
static unsigned getRealIndexedOpcode(unsigned Opc) {
switch (Opc) {
case ARM64::LDRXpre_isel: return ARM64::LDRXpre;
case ARM64::LDRWpre_isel: return ARM64::LDRWpre;
case ARM64::LDRDpre_isel: return ARM64::LDRDpre;
case ARM64::LDRSpre_isel: return ARM64::LDRSpre;
case ARM64::LDRBBpre_isel: return ARM64::LDRBBpre;
case ARM64::LDRHHpre_isel: return ARM64::LDRHHpre;
case ARM64::LDRSBWpre_isel: return ARM64::LDRSBWpre;
case ARM64::LDRSBXpre_isel: return ARM64::LDRSBXpre;
case ARM64::LDRSHWpre_isel: return ARM64::LDRSHWpre;
case ARM64::LDRSHXpre_isel: return ARM64::LDRSHXpre;
case ARM64::LDRSWpre_isel: return ARM64::LDRSWpre;
case ARM64::LDRDpost_isel: return ARM64::LDRDpost;
case ARM64::LDRSpost_isel: return ARM64::LDRSpost;
case ARM64::LDRXpost_isel: return ARM64::LDRXpost;
case ARM64::LDRWpost_isel: return ARM64::LDRWpost;
case ARM64::LDRHHpost_isel: return ARM64::LDRHHpost;
case ARM64::LDRBBpost_isel: return ARM64::LDRBBpost;
case ARM64::LDRSWpost_isel: return ARM64::LDRSWpost;
case ARM64::LDRSHWpost_isel: return ARM64::LDRSHWpost;
case ARM64::LDRSHXpost_isel: return ARM64::LDRSHXpost;
case ARM64::LDRSBWpost_isel: return ARM64::LDRSBWpost;
case ARM64::LDRSBXpost_isel: return ARM64::LDRSBXpost;
case ARM64::STRXpre_isel: return ARM64::STRXpre;
case ARM64::STRWpre_isel: return ARM64::STRWpre;
case ARM64::STRHHpre_isel: return ARM64::STRHHpre;
case ARM64::STRBBpre_isel: return ARM64::STRBBpre;
case ARM64::STRDpre_isel: return ARM64::STRDpre;
case ARM64::STRSpre_isel: return ARM64::STRSpre;
}
llvm_unreachable("Unexpected pre-indexed opcode!");
}
void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Do any auto-generated pseudo lowerings.
if (emitPseudoExpansionLowering(OutStreamer, MI))
return;
if (ARM64FI->getLOHRelated().count(MI)) {
// Generate a label for LOH related instruction
MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++);
// Associate the instruction with the label
LOHInstToLabel[MI] = LOHLabel;
OutStreamer.EmitLabel(LOHLabel);
}
// Do any manual lowerings.
switch (MI->getOpcode()) {
default:
break;
case ARM64::DBG_VALUE: {
if (isVerbose() && OutStreamer.hasRawTextSupport()) {
SmallString<128> TmpStr;
raw_svector_ostream OS(TmpStr);
PrintDebugValueComment(MI, OS);
OutStreamer.EmitRawText(StringRef(OS.str()));
}
return;
}
// Indexed loads and stores use a pseudo to handle complex operand
// tricks and writeback to the base register. We strip off the writeback
// operand and switch the opcode here. Post-indexed stores were handled by the
// tablegen'erated pseudos above. (The complex operand <--> simple
// operand isel is beyond tablegen's ability, so we do these manually).
case ARM64::LDRHHpre_isel:
case ARM64::LDRBBpre_isel:
case ARM64::LDRXpre_isel:
case ARM64::LDRWpre_isel:
case ARM64::LDRDpre_isel:
case ARM64::LDRSpre_isel:
case ARM64::LDRSBWpre_isel:
case ARM64::LDRSBXpre_isel:
case ARM64::LDRSHWpre_isel:
case ARM64::LDRSHXpre_isel:
case ARM64::LDRSWpre_isel:
case ARM64::LDRDpost_isel:
case ARM64::LDRSpost_isel:
case ARM64::LDRXpost_isel:
case ARM64::LDRWpost_isel:
case ARM64::LDRHHpost_isel:
case ARM64::LDRBBpost_isel:
case ARM64::LDRSWpost_isel:
case ARM64::LDRSHWpost_isel:
case ARM64::LDRSHXpost_isel:
case ARM64::LDRSBWpost_isel:
case ARM64::LDRSBXpost_isel: {
MCInst TmpInst;
// For loads, the writeback operand to be skipped is the second.
TmpInst.setOpcode(getRealIndexedOpcode(MI->getOpcode()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(2).getReg()));
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
EmitToStreamer(OutStreamer, TmpInst);
return;
}
case ARM64::STRXpre_isel:
case ARM64::STRWpre_isel:
case ARM64::STRHHpre_isel:
case ARM64::STRBBpre_isel:
case ARM64::STRDpre_isel:
case ARM64::STRSpre_isel: {
MCInst TmpInst;
// For loads, the writeback operand to be skipped is the first.
TmpInst.setOpcode(getRealIndexedOpcode(MI->getOpcode()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(2).getReg()));
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
EmitToStreamer(OutStreamer, TmpInst);
return;
}
// Tail calls use pseudo instructions so they have the proper code-gen
// attributes (isCall, isReturn, etc.). We lower them to the real
// instruction here.
case ARM64::TCRETURNri: {
MCInst TmpInst;
TmpInst.setOpcode(ARM64::BR);
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
EmitToStreamer(OutStreamer, TmpInst);
return;
}
case ARM64::TCRETURNdi: {
MCOperand Dest;
MCInstLowering.lowerOperand(MI->getOperand(0), Dest);
MCInst TmpInst;
TmpInst.setOpcode(ARM64::B);
TmpInst.addOperand(Dest);
EmitToStreamer(OutStreamer, TmpInst);
return;
}
case ARM64::TLSDESC_BLR: {
MCOperand Callee, Sym;
MCInstLowering.lowerOperand(MI->getOperand(0), Callee);
MCInstLowering.lowerOperand(MI->getOperand(1), Sym);
// First emit a relocation-annotation. This expands to no code, but requests
// the following instruction gets an R_AARCH64_TLSDESC_CALL.
MCInst TLSDescCall;
TLSDescCall.setOpcode(ARM64::TLSDESCCALL);
TLSDescCall.addOperand(Sym);
EmitToStreamer(OutStreamer, TLSDescCall);
// Other than that it's just a normal indirect call to the function loaded
// from the descriptor.
MCInst BLR;
BLR.setOpcode(ARM64::BLR);
BLR.addOperand(Callee);
EmitToStreamer(OutStreamer, BLR);
return;
}
case TargetOpcode::STACKMAP:
return LowerSTACKMAP(OutStreamer, SM, *MI);
case TargetOpcode::PATCHPOINT:
return LowerPATCHPOINT(OutStreamer, SM, *MI);
}
// Finally, do the automated lowerings for everything else.
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
EmitToStreamer(OutStreamer, TmpInst);
}
// Force static initialization.
extern "C" void LLVMInitializeARM64AsmPrinter() {
RegisterAsmPrinter<ARM64AsmPrinter> X(TheARM64Target);
}

View File

@ -0,0 +1,506 @@
//===-- ARM64BranchRelaxation.cpp - ARM64 branch relaxation ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm64-branch-relax"
#include "ARM64.h"
#include "ARM64InstrInfo.h"
#include "ARM64MachineFunctionInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
static cl::opt<bool>
BranchRelaxation("arm64-branch-relax", cl::Hidden, cl::init(true),
cl::desc("Relax out of range conditional branches"));
static cl::opt<unsigned>
TBZDisplacementBits("arm64-tbz-offset-bits", cl::Hidden, cl::init(14),
cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
static cl::opt<unsigned>
CBZDisplacementBits("arm64-cbz-offset-bits", cl::Hidden, cl::init(19),
cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
static cl::opt<unsigned>
BCCDisplacementBits("arm64-bcc-offset-bits", cl::Hidden, cl::init(19),
cl::desc("Restrict range of Bcc instructions (DEBUG)"));
STATISTIC(NumSplit, "Number of basic blocks split");
STATISTIC(NumRelaxed, "Number of conditional branches relaxed");
namespace {
class ARM64BranchRelaxation : public MachineFunctionPass {
/// BasicBlockInfo - Information about the offset and size of a single
/// basic block.
struct BasicBlockInfo {
/// Offset - Distance from the beginning of the function to the beginning
/// of this basic block.
///
/// The offset is always aligned as required by the basic block.
unsigned Offset;
/// Size - Size of the basic block in bytes. If the block contains
/// inline assembly, this is a worst case estimate.
///
/// The size does not include any alignment padding whether from the
/// beginning of the block, or from an aligned jump table at the end.
unsigned Size;
BasicBlockInfo() : Offset(0), Size(0) {}
/// Compute the offset immediately following this block. If LogAlign is
/// specified, return the offset the successor block will get if it has
/// this alignment.
unsigned postOffset(unsigned LogAlign = 0) const {
unsigned PO = Offset + Size;
unsigned Align = 1 << LogAlign;
return (PO + Align - 1) / Align * Align;
}
};
SmallVector<BasicBlockInfo, 16> BlockInfo;
MachineFunction *MF;
const ARM64InstrInfo *TII;
bool relaxBranchInstructions();
void scanFunction();
MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
void adjustBlockOffsets(MachineBasicBlock *BB);
bool isBlockInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
bool fixupConditionalBranch(MachineInstr *MI);
void computeBlockSize(MachineBasicBlock *MBB);
unsigned getInstrOffset(MachineInstr *MI) const;
void dumpBBs();
void verify();
public:
static char ID;
ARM64BranchRelaxation() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const {
return "ARM64 branch relaxation pass";
}
};
char ARM64BranchRelaxation::ID = 0;
}
/// verify - check BBOffsets, BBSizes, alignment of islands
void ARM64BranchRelaxation::verify() {
#ifndef NDEBUG
unsigned PrevNum = MF->begin()->getNumber();
for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E;
++MBBI) {
MachineBasicBlock *MBB = MBBI;
unsigned Align = MBB->getAlignment();
unsigned Num = MBB->getNumber();
assert(BlockInfo[Num].Offset % (1u << Align) == 0);
assert(!Num || BlockInfo[PrevNum].postOffset() <= BlockInfo[Num].Offset);
PrevNum = Num;
}
#endif
}
/// print block size and offset information - debugging
void ARM64BranchRelaxation::dumpBBs() {
for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E;
++MBBI) {
const BasicBlockInfo &BBI = BlockInfo[MBBI->getNumber()];
dbgs() << format("BB#%u\toffset=%08x\t", MBBI->getNumber(), BBI.Offset)
<< format("size=%#x\n", BBI.Size);
}
}
/// BBHasFallthrough - Return true if the specified basic block can fallthrough
/// into the block immediately after it.
static bool BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
MachineFunction::iterator MBBI = MBB;
// Can't fall off end of function.
if (std::next(MBBI) == MBB->getParent()->end())
return false;
MachineBasicBlock *NextBB = std::next(MBBI);
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end();
I != E; ++I)
if (*I == NextBB)
return true;
return false;
}
/// scanFunction - Do the initial scan of the function, building up
/// information about each block.
void ARM64BranchRelaxation::scanFunction() {
BlockInfo.clear();
BlockInfo.resize(MF->getNumBlockIDs());
// First thing, compute the size of all basic blocks, and see if the function
// has any inline assembly in it. If so, we have to be conservative about
// alignment assumptions, as we don't know for sure the size of any
// instructions in the inline assembly.
for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
computeBlockSize(I);
// Compute block offsets and known bits.
adjustBlockOffsets(MF->begin());
}
/// computeBlockSize - Compute the size for MBB.
/// This function updates BlockInfo directly.
void ARM64BranchRelaxation::computeBlockSize(MachineBasicBlock *MBB) {
unsigned Size = 0;
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
++I)
Size += TII->GetInstSizeInBytes(I);
BlockInfo[MBB->getNumber()].Size = Size;
}
/// getInstrOffset - Return the current offset of the specified machine
/// instruction from the start of the function. This offset changes as stuff is
/// moved around inside the function.
unsigned ARM64BranchRelaxation::getInstrOffset(MachineInstr *MI) const {
MachineBasicBlock *MBB = MI->getParent();
// The offset is composed of two things: the sum of the sizes of all MBB's
// before this instruction's block, and the offset from the start of the block
// it is in.
unsigned Offset = BlockInfo[MBB->getNumber()].Offset;
// Sum instructions before MI in MBB.
for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
assert(I != MBB->end() && "Didn't find MI in its own basic block?");
Offset += TII->GetInstSizeInBytes(I);
}
return Offset;
}
void ARM64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock *Start) {
unsigned PrevNum = Start->getNumber();
MachineFunction::iterator MBBI = Start, E = MF->end();
for (++MBBI; MBBI != E; ++MBBI) {
MachineBasicBlock *MBB = MBBI;
unsigned Num = MBB->getNumber();
if (!Num) // block zero is never changed from offset zero.
continue;
// Get the offset and known bits at the end of the layout predecessor.
// Include the alignment of the current block.
unsigned LogAlign = MBBI->getAlignment();
BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(LogAlign);
PrevNum = Num;
}
}
/// Split the basic block containing MI into two blocks, which are joined by
/// an unconditional branch. Update data structures and renumber blocks to
/// account for this change and returns the newly created block.
/// NOTE: Successor list of the original BB is out of date after this function,
/// and must be updated by the caller! Other transforms follow using this
/// utility function, so no point updating now rather than waiting.
MachineBasicBlock *
ARM64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
MachineBasicBlock *OrigBB = MI->getParent();
// Create a new MBB for the code after the OrigBB.
MachineBasicBlock *NewBB =
MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
MachineFunction::iterator MBBI = OrigBB;
++MBBI;
MF->insert(MBBI, NewBB);
// Splice the instructions starting with MI over to NewBB.
NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
// Add an unconditional branch from OrigBB to NewBB.
// Note the new unconditional branch is not being recorded.
// There doesn't seem to be meaningful DebugInfo available; this doesn't
// correspond to anything in the source.
BuildMI(OrigBB, DebugLoc(), TII->get(ARM64::B)).addMBB(NewBB);
// Insert an entry into BlockInfo to align it properly with the block numbers.
BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
// Figure out how large the OrigBB is. As the first half of the original
// block, it cannot contain a tablejump. The size includes
// the new jump we added. (It should be possible to do this without
// recounting everything, but it's very confusing, and this is rarely
// executed.)
computeBlockSize(OrigBB);
// Figure out how large the NewMBB is. As the second half of the original
// block, it may contain a tablejump.
computeBlockSize(NewBB);
// All BBOffsets following these blocks must be modified.
adjustBlockOffsets(OrigBB);
++NumSplit;
return NewBB;
}
/// isBlockInRange - Returns true if the distance between specific MI and
/// specific BB can fit in MI's displacement field.
bool ARM64BranchRelaxation::isBlockInRange(MachineInstr *MI,
MachineBasicBlock *DestBB,
unsigned Bits) {
unsigned MaxOffs = ((1 << (Bits - 1)) - 1) << 2;
unsigned BrOffset = getInstrOffset(MI);
unsigned DestOffset = BlockInfo[DestBB->getNumber()].Offset;
DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
<< " from BB#" << MI->getParent()->getNumber()
<< " max delta=" << MaxOffs << " from " << getInstrOffset(MI)
<< " to " << DestOffset << " offset "
<< int(DestOffset - BrOffset) << "\t" << *MI);
// Branch before the Dest.
if (BrOffset <= DestOffset)
return (DestOffset - BrOffset <= MaxOffs);
return (BrOffset - DestOffset <= MaxOffs);
}
static bool isConditionalBranch(unsigned Opc) {
switch (Opc) {
default:
return false;
case ARM64::TBZ:
case ARM64::TBNZ:
case ARM64::CBZW:
case ARM64::CBNZW:
case ARM64::CBZX:
case ARM64::CBNZX:
case ARM64::Bcc:
return true;
}
}
static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
switch (MI->getOpcode()) {
default:
assert(0 && "unexpected opcode!");
case ARM64::TBZ:
case ARM64::TBNZ:
return MI->getOperand(2).getMBB();
case ARM64::CBZW:
case ARM64::CBNZW:
case ARM64::CBZX:
case ARM64::CBNZX:
case ARM64::Bcc:
return MI->getOperand(1).getMBB();
}
}
static unsigned getOppositeConditionOpcode(unsigned Opc) {
switch (Opc) {
default:
assert(0 && "unexpected opcode!");
case ARM64::TBNZ: return ARM64::TBZ;
case ARM64::TBZ: return ARM64::TBNZ;
case ARM64::CBNZW: return ARM64::CBZW;
case ARM64::CBNZX: return ARM64::CBZX;
case ARM64::CBZW: return ARM64::CBNZW;
case ARM64::CBZX: return ARM64::CBNZX;
case ARM64::Bcc: return ARM64::Bcc; // Condition is an operand for Bcc.
}
}
static unsigned getBranchDisplacementBits(unsigned Opc) {
switch (Opc) {
default:
assert(0 && "unexpected opcode!");
case ARM64::TBNZ:
case ARM64::TBZ:
return TBZDisplacementBits;
case ARM64::CBNZW:
case ARM64::CBZW:
case ARM64::CBNZX:
case ARM64::CBZX:
return CBZDisplacementBits;
case ARM64::Bcc:
return BCCDisplacementBits;
}
}
static inline void invertBccCondition(MachineInstr *MI) {
assert(MI->getOpcode() == ARM64::Bcc && "Unexpected opcode!");
ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(0).getImm();
CC = ARM64CC::getInvertedCondCode(CC);
MI->getOperand(0).setImm((int64_t)CC);
}
/// fixupConditionalBranch - Fix up a conditional branch whose destination is
/// too far away to fit in its displacement field. It is converted to an inverse
/// conditional branch + an unconditional branch to the destination.
bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
MachineBasicBlock *DestBB = getDestBlock(MI);
// Add an unconditional branch to the destination and invert the branch
// condition to jump over it:
// tbz L1
// =>
// tbnz L2
// b L1
// L2:
// If the branch is at the end of its MBB and that has a fall-through block,
// direct the updated conditional branch to the fall-through block. Otherwise,
// split the MBB before the next instruction.
MachineBasicBlock *MBB = MI->getParent();
MachineInstr *BMI = &MBB->back();
bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
if (BMI != MI) {
if (std::next(MachineBasicBlock::iterator(MI)) ==
std::prev(MBB->getLastNonDebugInstr()) &&
BMI->getOpcode() == ARM64::B) {
// Last MI in the BB is an unconditional branch. Can we simply invert the
// condition and swap destinations:
// beq L1
// b L2
// =>
// bne L2
// b L1
MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
if (isBlockInRange(MI, NewDest,
getBranchDisplacementBits(MI->getOpcode()))) {
DEBUG(dbgs() << " Invert condition and swap its destination with "
<< *BMI);
BMI->getOperand(0).setMBB(DestBB);
unsigned OpNum =
(MI->getOpcode() == ARM64::TBZ || MI->getOpcode() == ARM64::TBNZ)
? 2
: 1;
MI->getOperand(OpNum).setMBB(NewDest);
MI->setDesc(TII->get(getOppositeConditionOpcode(MI->getOpcode())));
if (MI->getOpcode() == ARM64::Bcc)
invertBccCondition(MI);
return true;
}
}
}
if (NeedSplit) {
// Analyze the branch so we know how to update the successor lists.
MachineBasicBlock *TBB, *FBB;
SmallVector<MachineOperand, 2> Cond;
TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, false);
MachineBasicBlock *NewBB = splitBlockBeforeInstr(MI);
// No need for the branch to the next block. We're adding an unconditional
// branch to the destination.
int delta = TII->GetInstSizeInBytes(&MBB->back());
BlockInfo[MBB->getNumber()].Size -= delta;
MBB->back().eraseFromParent();
// BlockInfo[SplitBB].Offset is wrong temporarily, fixed below
// Update the successor lists according to the transformation to follow.
// Do it here since if there's no split, no update is needed.
MBB->replaceSuccessor(FBB, NewBB);
NewBB->addSuccessor(FBB);
}
MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
<< ", invert condition and change dest. to BB#"
<< NextBB->getNumber() << "\n");
// Insert a new conditional branch and a new unconditional branch.
MachineInstrBuilder MIB = BuildMI(
MBB, DebugLoc(), TII->get(getOppositeConditionOpcode(MI->getOpcode())))
.addOperand(MI->getOperand(0));
if (MI->getOpcode() == ARM64::TBZ || MI->getOpcode() == ARM64::TBNZ)
MIB.addOperand(MI->getOperand(1));
if (MI->getOpcode() == ARM64::Bcc)
invertBccCondition(MIB);
MIB.addMBB(NextBB);
BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
BuildMI(MBB, DebugLoc(), TII->get(ARM64::B)).addMBB(DestBB);
BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
// Remove the old conditional branch. It may or may not still be in MBB.
BlockInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
MI->eraseFromParent();
// Finally, keep the block offsets up to date.
adjustBlockOffsets(MBB);
return true;
}
bool ARM64BranchRelaxation::relaxBranchInstructions() {
bool Changed = false;
// Relaxing branches involves creating new basic blocks, so re-eval
// end() for termination.
for (MachineFunction::iterator I = MF->begin(); I != MF->end(); ++I) {
MachineInstr *MI = I->getFirstTerminator();
if (isConditionalBranch(MI->getOpcode()) &&
!isBlockInRange(MI, getDestBlock(MI),
getBranchDisplacementBits(MI->getOpcode()))) {
fixupConditionalBranch(MI);
++NumRelaxed;
Changed = true;
}
}
return Changed;
}
bool ARM64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
// If the pass is disabled, just bail early.
if (!BranchRelaxation)
return false;
DEBUG(dbgs() << "***** ARM64BranchRelaxation *****\n");
TII = (const ARM64InstrInfo *)MF->getTarget().getInstrInfo();
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
MF->RenumberBlocks();
// Do the initial scan of the function, building up information about the
// sizes of each block.
scanFunction();
DEBUG(dbgs() << " Basic blocks before relaxation\n");
DEBUG(dumpBBs());
bool MadeChange = false;
while (relaxBranchInstructions())
MadeChange = true;
// After a while, this might be made debug-only, but it is not expensive.
verify();
DEBUG(dbgs() << " Basic blocks after relaxation\n");
DEBUG(dbgs() << '\n'; dumpBBs());
BlockInfo.clear();
return MadeChange;
}
/// createARM64BranchRelaxation - returns an instance of the constpool
/// island pass.
FunctionPass *llvm::createARM64BranchRelaxation() {
return new ARM64BranchRelaxation();
}

View File

@ -0,0 +1,94 @@
//=== ARM64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the custom routines for the ARM64 Calling Convention that
// aren't done by tablegen.
//
//===----------------------------------------------------------------------===//
#ifndef ARM64CALLINGCONV_H
#define ARM64CALLINGCONV_H
#include "ARM64InstrInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
/// CC_ARM64_Custom_i1i8i16_Reg - customized handling of passing i1/i8/i16 via
/// register. Here, ValVT can be i1/i8/i16 or i32 depending on whether the
/// argument is already promoted and LocVT is i1/i8/i16. We only promote the
/// argument to i32 if we are sure this argument will be passed in register.
static bool CC_ARM64_Custom_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags,
CCState &State,
bool IsWebKitJS = false) {
static const uint16_t RegList1[] = { ARM64::W0, ARM64::W1, ARM64::W2,
ARM64::W3, ARM64::W4, ARM64::W5,
ARM64::W6, ARM64::W7 };
static const uint16_t RegList2[] = { ARM64::X0, ARM64::X1, ARM64::X2,
ARM64::X3, ARM64::X4, ARM64::X5,
ARM64::X6, ARM64::X7 };
static const uint16_t WebKitRegList1[] = { ARM64::W0 };
static const uint16_t WebKitRegList2[] = { ARM64::X0 };
const uint16_t *List1 = IsWebKitJS ? WebKitRegList1 : RegList1;
const uint16_t *List2 = IsWebKitJS ? WebKitRegList2 : RegList2;
if (unsigned Reg = State.AllocateReg(List1, List2, 8)) {
// Customized extra section for handling i1/i8/i16:
// We need to promote the argument to i32 if it is not done already.
if (ValVT != MVT::i32) {
if (ArgFlags.isSExt())
LocInfo = CCValAssign::SExt;
else if (ArgFlags.isZExt())
LocInfo = CCValAssign::ZExt;
else
LocInfo = CCValAssign::AExt;
ValVT = MVT::i32;
}
// Set LocVT to i32 as well if passing via register.
LocVT = MVT::i32;
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return true;
}
return false;
}
/// CC_ARM64_WebKit_JS_i1i8i16_Reg - customized handling of passing i1/i8/i16
/// via register. This behaves the same as CC_ARM64_Custom_i1i8i16_Reg, but only
/// uses the first register.
static bool CC_ARM64_WebKit_JS_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags,
CCState &State) {
return CC_ARM64_Custom_i1i8i16_Reg(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
State, true);
}
/// CC_ARM64_Custom_i1i8i16_Stack: customized handling of passing i1/i8/i16 on
/// stack. Here, ValVT can be i1/i8/i16 or i32 depending on whether the argument
/// is already promoted and LocVT is i1/i8/i16. If ValVT is already promoted,
/// it will be truncated back to i1/i8/i16.
static bool CC_ARM64_Custom_i1i8i16_Stack(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags,
CCState &State) {
unsigned Space = ((LocVT == MVT::i1 || LocVT == MVT::i8) ? 1 : 2);
unsigned Offset12 = State.AllocateStack(Space, Space);
ValVT = LocVT;
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset12, LocVT, LocInfo));
return true;
}
} // End llvm namespace
#endif

View File

@ -0,0 +1,210 @@
//===- ARM64CallingConv.td - Calling Conventions for ARM64 -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This describes the calling conventions for ARM64 architecture.
//
//===----------------------------------------------------------------------===//
/// CCIfAlign - Match of the original alignment of the arg
class CCIfAlign<string Align, CCAction A> :
CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
//===----------------------------------------------------------------------===//
// ARM AAPCS64 Calling Convention
//===----------------------------------------------------------------------===//
def CC_ARM64_AAPCS : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
// An SRet is passed in X8, not X0 like a normal pointer parameter.
CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Reg">>,
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
// i128 is split to two i64s, we can't fit half to register X7.
CCIfType<[i64], CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6],
[X0, X1, X3, X5]>>>,
// i128 is split to two i64s, and its stack alignment is 16 bytes.
CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32],
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
// If more than will fit in registers, pass them on the stack instead.
CCIfType<[i1, i8, i16], CCAssignToStack<8, 8>>,
CCIfType<[i32, f32], CCAssignToStack<8, 8>>,
CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8],
CCAssignToStack<8, 8>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>>
]>;
def RetCC_ARM64_AAPCS : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32],
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
]>;
// Darwin uses a calling convention which differs in only two ways
// from the standard one at this level:
// + i128s (i.e. split i64s) don't need even registers.
// + Stack slots are sized as needed rather than being at least 64-bit.
def CC_ARM64_DarwinPCS : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
// An SRet is passed in X8, not X0 like a normal pointer parameter.
CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Reg">>,
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
// i128 is split to two i64s, we can't fit half to register X7.
CCIfType<[i64],
CCIfSplit<CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6],
[W0, W1, W2, W3, W4, W5, W6]>>>,
// i128 is split to two i64s, and its stack alignment is 16 bytes.
CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32],
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
// If more than will fit in registers, pass them on the stack instead.
CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Stack">>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8],
CCAssignToStack<8, 8>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>>
]>;
def CC_ARM64_DarwinPCS_VarArg : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
// Handle all scalar types as either i64 or f64.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
CCIfType<[f32], CCPromoteToType<f64>>,
// Everything is on the stack.
// i128 is split to two i64s, and its stack alignment is 16 bytes.
CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], CCAssignToStack<8, 8>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>>
]>;
// The WebKit_JS calling convention only passes the first argument (the callee)
// in register and the remaining arguments on stack. We allow 32bit stack slots,
// so that WebKit can write partial values in the stack and define the other
// 32bit quantity as undef.
def CC_ARM64_WebKit_JS : CallingConv<[
// Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_WebKit_JS_i1i8i16_Reg">>,
CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>,
CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>,
// Pass the remaining arguments on the stack instead.
CCIfType<[i1, i8, i16], CCAssignToStack<4, 4>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
CCIfType<[i64, f64], CCAssignToStack<8, 8>>
]>;
def RetCC_ARM64_WebKit_JS : CallingConv<[
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
]>;
// FIXME: LR is only callee-saved in the sense that *we* preserve it and are
// presumably a callee to someone. External functions may not do so, but this
// is currently safe since BL has LR as an implicit-def and what happens after a
// tail call doesn't matter.
//
// It would be better to model its preservation semantics properly (create a
// vreg on entry, use it in RET & tail call generation; make that vreg def if we
// end up saving LR as part of a call frame). Watch this space...
def CSR_ARM64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
X23, X24, X25, X26, X27, X28,
D8, D9, D10, D11,
D12, D13, D14, D15)>;
// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
// 'this' and the pointer return value are both passed in X0 in these cases,
// this can be partially modelled by treating X0 as a callee-saved register;
// only the resulting RegMask is used; the SaveList is ignored
//
// (For generic ARM 64-bit ABI code, clang will not generate constructors or
// destructors with 'this' returns, so this RegMask will not be used in that
// case)
def CSR_ARM64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_ARM64_AAPCS, X0)>;
// The function used by Darwin to obtain the address of a thread-local variable
// guarantees more than a normal AAPCS function. x16 and x17 are used on the
// fast path for calculation, but other registers except X0 (argument/return)
// and LR (it is a call, after all) are preserved.
def CSR_ARM64_TLS_Darwin
: CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17),
FP,
(sequence "Q%u", 0, 31))>;
// The ELF stub used for TLS-descriptor access saves every feasible
// register. Only X0 and LR are clobbered.
def CSR_ARM64_TLS_ELF
: CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP,
(sequence "Q%u", 0, 31))>;
def CSR_ARM64_AllRegs
: CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP,
(sequence "X%u", 0, 28), FP, LR, SP,
(sequence "B%u", 0, 31), (sequence "H%u", 0, 31),
(sequence "S%u", 0, 31), (sequence "D%u", 0, 31),
(sequence "Q%u", 0, 31))>;

View File

@ -0,0 +1,148 @@
//===-- ARM64CleanupLocalDynamicTLSPass.cpp -----------------------*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Local-dynamic access to thread-local variables proceeds in three stages.
//
// 1. The offset of this Module's thread-local area from TPIDR_EL0 is calculated
// in much the same way as a general-dynamic TLS-descriptor access against
// the special symbol _TLS_MODULE_BASE.
// 2. The variable's offset from _TLS_MODULE_BASE_ is calculated using
// instructions with "dtprel" modifiers.
// 3. These two are added, together with TPIDR_EL0, to obtain the variable's
// true address.
//
// This is only better than general-dynamic access to the variable if two or
// more of the first stage TLS-descriptor calculations can be combined. This
// pass looks through a function and performs such combinations.
//
//===----------------------------------------------------------------------===//
#include "ARM64.h"
#include "ARM64InstrInfo.h"
#include "ARM64MachineFunctionInfo.h"
#include "ARM64TargetMachine.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
namespace {
struct LDTLSCleanup : public MachineFunctionPass {
static char ID;
LDTLSCleanup() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF) {
ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
if (AFI->getNumLocalDynamicTLSAccesses() < 2) {
// No point folding accesses if there isn't at least two.
return false;
}
MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
return VisitNode(DT->getRootNode(), 0);
}
// Visit the dominator subtree rooted at Node in pre-order.
// If TLSBaseAddrReg is non-null, then use that to replace any
// TLS_base_addr instructions. Otherwise, create the register
// when the first such instruction is seen, and then use it
// as we encounter more instructions.
bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
MachineBasicBlock *BB = Node->getBlock();
bool Changed = false;
// Traverse the current block.
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
++I) {
switch (I->getOpcode()) {
case ARM64::TLSDESC_BLR:
// Make sure it's a local dynamic access.
if (!I->getOperand(1).isSymbol() ||
strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
break;
if (TLSBaseAddrReg)
I = replaceTLSBaseAddrCall(I, TLSBaseAddrReg);
else
I = setRegister(I, &TLSBaseAddrReg);
Changed = true;
break;
default:
break;
}
}
// Visit the children of this block in the dominator tree.
for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
I != E; ++I) {
Changed |= VisitNode(*I, TLSBaseAddrReg);
}
return Changed;
}
// Replace the TLS_base_addr instruction I with a copy from
// TLSBaseAddrReg, returning the new instruction.
MachineInstr *replaceTLSBaseAddrCall(MachineInstr *I,
unsigned TLSBaseAddrReg) {
MachineFunction *MF = I->getParent()->getParent();
const ARM64TargetMachine *TM =
static_cast<const ARM64TargetMachine *>(&MF->getTarget());
const ARM64InstrInfo *TII = TM->getInstrInfo();
// Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
// code sequence assumes the address will be.
MachineInstr *Copy =
BuildMI(*I->getParent(), I, I->getDebugLoc(),
TII->get(TargetOpcode::COPY), ARM64::X0).addReg(TLSBaseAddrReg);
// Erase the TLS_base_addr instruction.
I->eraseFromParent();
return Copy;
}
// Create a virtal register in *TLSBaseAddrReg, and populate it by
// inserting a copy instruction after I. Returns the new instruction.
MachineInstr *setRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
MachineFunction *MF = I->getParent()->getParent();
const ARM64TargetMachine *TM =
static_cast<const ARM64TargetMachine *>(&MF->getTarget());
const ARM64InstrInfo *TII = TM->getInstrInfo();
// Create a virtual register for the TLS base address.
MachineRegisterInfo &RegInfo = MF->getRegInfo();
*TLSBaseAddrReg = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
// Insert a copy from X0 to TLSBaseAddrReg for later.
MachineInstr *Next = I->getNextNode();
MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
TII->get(TargetOpcode::COPY),
*TLSBaseAddrReg).addReg(ARM64::X0);
return Copy;
}
virtual const char *getPassName() const {
return "Local Dynamic TLS Access Clean-up";
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
}
char LDTLSCleanup::ID = 0;
FunctionPass *llvm::createARM64CleanupLocalDynamicTLSPass() {
return new LDTLSCleanup();
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,918 @@
//===-- ARM64ConditionalCompares.cpp --- CCMP formation for ARM64 ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the ARM64ConditionalCompares pass which reduces
// branching and code size by using the conditional compare instructions CCMP,
// CCMN, and FCMP.
//
// The CFG transformations for forming conditional compares are very similar to
// if-conversion, and this pass should run immediately before the early
// if-conversion pass.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm64-ccmp"
#include "ARM64.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
// Absolute maximum number of instructions allowed per speculated block.
// This bypasses all other heuristics, so it should be set fairly high.
static cl::opt<unsigned> BlockInstrLimit(
"arm64-ccmp-limit", cl::init(30), cl::Hidden,
cl::desc("Maximum number of instructions per speculated block."));
// Stress testing mode - disable heuristics.
static cl::opt<bool> Stress("arm64-stress-ccmp", cl::Hidden,
cl::desc("Turn all knobs to 11"));
STATISTIC(NumConsidered, "Number of ccmps considered");
STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
STATISTIC(NumMultCPSRUses, "Number of ccmps rejected (CPSR used)");
STATISTIC(NumUnknCPSRDefs, "Number of ccmps rejected (CPSR def unknown)");
STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
STATISTIC(NumConverted, "Number of ccmp instructions created");
STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
//===----------------------------------------------------------------------===//
// SSACCmpConv
//===----------------------------------------------------------------------===//
//
// The SSACCmpConv class performs ccmp-conversion on SSA form machine code
// after determining if it is possible. The class contains no heuristics;
// external code should be used to determine when ccmp-conversion is a good
// idea.
//
// CCmp-formation works on a CFG representing chained conditions, typically
// from C's short-circuit || and && operators:
//
// From: Head To: Head
// / | CmpBB
// / | / |
// | CmpBB / |
// | / | Tail |
// | / | | |
// Tail | | |
// | | | |
// ... ... ... ...
//
// The Head block is terminated by a br.cond instruction, and the CmpBB block
// contains compare + br.cond. Tail must be a successor of both.
//
// The cmp-conversion turns the compare instruction in CmpBB into a conditional
// compare, and merges CmpBB into Head, speculatively executing its
// instructions. The ARM64 conditional compare instructions have an immediate
// operand that specifies the NZCV flag values when the condition is false and
// the compare isn't executed. This makes it possible to chain compares with
// different condition codes.
//
// Example:
//
// if (a == 5 || b == 17)
// foo();
//
// Head:
// cmp w0, #5
// b.eq Tail
// CmpBB:
// cmp w1, #17
// b.eq Tail
// ...
// Tail:
// bl _foo
//
// Becomes:
//
// Head:
// cmp w0, #5
// ccmp w1, #17, 4, ne ; 4 = nZcv
// b.eq Tail
// ...
// Tail:
// bl _foo
//
// The ccmp condition code is the one that would cause the Head terminator to
// branch to CmpBB.
//
// FIXME: It should also be possible to speculate a block on the critical edge
// between Head and Tail, just like if-converting a diamond.
//
// FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
namespace {
class SSACCmpConv {
MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
public:
/// The first block containing a conditional branch, dominating everything
/// else.
MachineBasicBlock *Head;
/// The block containing cmp+br.cond with a sucessor shared with Head.
MachineBasicBlock *CmpBB;
/// The common successor for Head and CmpBB.
MachineBasicBlock *Tail;
/// The compare instruction in CmpBB that can be converted to a ccmp.
MachineInstr *CmpMI;
private:
/// The branch condition in Head as determined by AnalyzeBranch.
SmallVector<MachineOperand, 4> HeadCond;
/// The condition code that makes Head branch to CmpBB.
ARM64CC::CondCode HeadCmpBBCC;
/// The branch condition in CmpBB.
SmallVector<MachineOperand, 4> CmpBBCond;
/// The condition code that makes CmpBB branch to Tail.
ARM64CC::CondCode CmpBBTailCC;
/// Check if the Tail PHIs are trivially convertible.
bool trivialTailPHIs();
/// Remove CmpBB from the Tail PHIs.
void updateTailPHIs();
/// Check if an operand defining DstReg is dead.
bool isDeadDef(unsigned DstReg);
/// Find the compare instruction in MBB that controls the conditional branch.
/// Return NULL if a convertible instruction can't be found.
MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
/// Return true if all non-terminator instructions in MBB can be safely
/// speculated.
bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
public:
/// runOnMachineFunction - Initialize per-function data structures.
void runOnMachineFunction(MachineFunction &MF) {
this->MF = &MF;
TII = MF.getTarget().getInstrInfo();
TRI = MF.getTarget().getRegisterInfo();
MRI = &MF.getRegInfo();
}
/// If the sub-CFG headed by MBB can be cmp-converted, initialize the
/// internal state, and return true.
bool canConvert(MachineBasicBlock *MBB);
/// Cmo-convert the last block passed to canConvertCmp(), assuming
/// it is possible. Add any erased blocks to RemovedBlocks.
void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
/// Return the expected code size delta if the conversion into a
/// conditional compare is performed.
int expectedCodeSizeDelta() const;
};
} // end anonymous namespace
// Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
// This means that no if-conversion is required when merging CmpBB into Head.
bool SSACCmpConv::trivialTailPHIs() {
for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
I != E && I->isPHI(); ++I) {
unsigned HeadReg = 0, CmpBBReg = 0;
// PHI operands come in (VReg, MBB) pairs.
for (unsigned oi = 1, oe = I->getNumOperands(); oi != oe; oi += 2) {
MachineBasicBlock *MBB = I->getOperand(oi + 1).getMBB();
unsigned Reg = I->getOperand(oi).getReg();
if (MBB == Head) {
assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
HeadReg = Reg;
}
if (MBB == CmpBB) {
assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
CmpBBReg = Reg;
}
}
if (HeadReg != CmpBBReg)
return false;
}
return true;
}
// Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
// removing the CmpBB operands. The Head operands will be identical.
void SSACCmpConv::updateTailPHIs() {
for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
I != E && I->isPHI(); ++I) {
// I is a PHI. It can have multiple entries for CmpBB.
for (unsigned oi = I->getNumOperands(); oi > 2; oi -= 2) {
// PHI operands are (Reg, MBB) at (oi-2, oi-1).
if (I->getOperand(oi - 1).getMBB() == CmpBB) {
I->RemoveOperand(oi - 1);
I->RemoveOperand(oi - 2);
}
}
}
}
// This pass runs before the ARM64DeadRegisterDefinitions pass, so compares are
// still writing virtual registers without any uses.
bool SSACCmpConv::isDeadDef(unsigned DstReg) {
// Writes to the zero register are dead.
if (DstReg == ARM64::WZR || DstReg == ARM64::XZR)
return true;
if (!TargetRegisterInfo::isVirtualRegister(DstReg))
return false;
// A virtual register def without any uses will be marked dead later, and
// eventually replaced by the zero register.
return MRI->use_nodbg_empty(DstReg);
}
// Parse a condition code returned by AnalyzeBranch, and compute the CondCode
// corresponding to TBB.
// Return
bool parseCond(ArrayRef<MachineOperand> Cond, ARM64CC::CondCode &CC) {
// A normal br.cond simply has the condition code.
if (Cond[0].getImm() != -1) {
assert(Cond.size() == 1 && "Unknown Cond array format");
CC = (ARM64CC::CondCode)(int)Cond[0].getImm();
return true;
}
// For tbz and cbz instruction, the opcode is next.
switch (Cond[1].getImm()) {
default:
// This includes tbz / tbnz branches which can't be converted to
// ccmp + br.cond.
return false;
case ARM64::CBZW:
case ARM64::CBZX:
assert(Cond.size() == 3 && "Unknown Cond array format");
CC = ARM64CC::EQ;
return true;
case ARM64::CBNZW:
case ARM64::CBNZX:
assert(Cond.size() == 3 && "Unknown Cond array format");
CC = ARM64CC::NE;
return true;
}
}
MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
MachineBasicBlock::iterator I = MBB->getFirstTerminator();
if (I == MBB->end())
return 0;
// The terminator must be controlled by the flags.
if (!I->readsRegister(ARM64::CPSR)) {
switch (I->getOpcode()) {
case ARM64::CBZW:
case ARM64::CBZX:
case ARM64::CBNZW:
case ARM64::CBNZX:
// These can be converted into a ccmp against #0.
return I;
}
++NumCmpTermRejs;
DEBUG(dbgs() << "Flags not used by terminator: " << *I);
return 0;
}
// Now find the instruction controlling the terminator.
for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
--I;
assert(!I->isTerminator() && "Spurious terminator");
switch (I->getOpcode()) {
// cmp is an alias for subs with a dead destination register.
case ARM64::SUBSWri:
case ARM64::SUBSXri:
// cmn is an alias for adds with a dead destination register.
case ARM64::ADDSWri:
case ARM64::ADDSXri:
// Check that the immediate operand is within range, ccmp wants a uimm5.
// Rd = SUBSri Rn, imm, shift
if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
++NumImmRangeRejs;
return 0;
}
// Fall through.
case ARM64::SUBSWrr:
case ARM64::SUBSXrr:
case ARM64::ADDSWrr:
case ARM64::ADDSXrr:
if (isDeadDef(I->getOperand(0).getReg()))
return I;
DEBUG(dbgs() << "Can't convert compare with live destination: " << *I);
++NumLiveDstRejs;
return 0;
case ARM64::FCMPSrr:
case ARM64::FCMPDrr:
case ARM64::FCMPESrr:
case ARM64::FCMPEDrr:
return I;
}
// Check for flag reads and clobbers.
MIOperands::PhysRegInfo PRI =
MIOperands(I).analyzePhysReg(ARM64::CPSR, TRI);
if (PRI.Reads) {
// The ccmp doesn't produce exactly the same flags as the original
// compare, so reject the transform if there are uses of the flags
// besides the terminators.
DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
++NumMultCPSRUses;
return 0;
}
if (PRI.Clobbers) {
DEBUG(dbgs() << "Not convertible compare: " << *I);
++NumUnknCPSRDefs;
return 0;
}
}
DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n');
return 0;
}
/// Determine if all the instructions in MBB can safely
/// be speculated. The terminators are not considered.
///
/// Only CmpMI is allowed to clobber the flags.
///
bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
const MachineInstr *CmpMI) {
// Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
// get right.
if (!MBB->livein_empty()) {
DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n");
return false;
}
unsigned InstrCount = 0;
// Check all instructions, except the terminators. It is assumed that
// terminators never have side effects or define any used register values.
for (MachineBasicBlock::iterator I = MBB->begin(),
E = MBB->getFirstTerminator();
I != E; ++I) {
if (I->isDebugValue())
continue;
if (++InstrCount > BlockInstrLimit && !Stress) {
DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than "
<< BlockInstrLimit << " instructions.\n");
return false;
}
// There shouldn't normally be any phis in a single-predecessor block.
if (I->isPHI()) {
DEBUG(dbgs() << "Can't hoist: " << *I);
return false;
}
// Don't speculate loads. Note that it may be possible and desirable to
// speculate GOT or constant pool loads that are guaranteed not to trap,
// but we don't support that for now.
if (I->mayLoad()) {
DEBUG(dbgs() << "Won't speculate load: " << *I);
return false;
}
// We never speculate stores, so an AA pointer isn't necessary.
bool DontMoveAcrossStore = true;
if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) {
DEBUG(dbgs() << "Can't speculate: " << *I);
return false;
}
// Only CmpMI is alowed to clobber the flags.
if (&*I != CmpMI && I->modifiesRegister(ARM64::CPSR, TRI)) {
DEBUG(dbgs() << "Clobbers flags: " << *I);
return false;
}
}
return true;
}
/// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
/// candidate for cmp-conversion. Fill out the internal state.
///
bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
Head = MBB;
Tail = CmpBB = 0;
if (Head->succ_size() != 2)
return false;
MachineBasicBlock *Succ0 = Head->succ_begin()[0];
MachineBasicBlock *Succ1 = Head->succ_begin()[1];
// CmpBB can only have a single predecessor. Tail is allowed many.
if (Succ0->pred_size() != 1)
std::swap(Succ0, Succ1);
// Succ0 is our candidate for CmpBB.
if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
return false;
CmpBB = Succ0;
Tail = Succ1;
if (!CmpBB->isSuccessor(Tail))
return false;
// The CFG topology checks out.
DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() << " -> BB#"
<< CmpBB->getNumber() << " -> BB#" << Tail->getNumber() << '\n');
++NumConsidered;
// Tail is allowed to have many predecessors, but we can't handle PHIs yet.
//
// FIXME: Real PHIs could be if-converted as long as the CmpBB values are
// defined before The CmpBB cmp clobbers the flags. Alternatively, it should
// always be safe to sink the ccmp down to immediately before the CmpBB
// terminators.
if (!trivialTailPHIs()) {
DEBUG(dbgs() << "Can't handle phis in Tail.\n");
++NumPhiRejs;
return false;
}
if (!Tail->livein_empty()) {
DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
++NumPhysRejs;
return false;
}
// CmpBB should never have PHIs since Head is its only predecessor.
// FIXME: Clean them up if it happens.
if (!CmpBB->empty() && CmpBB->front().isPHI()) {
DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
++NumPhi2Rejs;
return false;
}
if (!CmpBB->livein_empty()) {
DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
++NumPhysRejs;
return false;
}
// The branch we're looking to eliminate must be analyzable.
HeadCond.clear();
MachineBasicBlock *TBB = 0, *FBB = 0;
if (TII->AnalyzeBranch(*Head, TBB, FBB, HeadCond)) {
DEBUG(dbgs() << "Head branch not analyzable.\n");
++NumHeadBranchRejs;
return false;
}
// This is weird, probably some sort of degenerate CFG, or an edge to a
// landing pad.
if (!TBB || HeadCond.empty()) {
DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in Head.\n");
++NumHeadBranchRejs;
return false;
}
if (!parseCond(HeadCond, HeadCmpBBCC)) {
DEBUG(dbgs() << "Unsupported branch type on Head\n");
++NumHeadBranchRejs;
return false;
}
// Make sure the branch direction is right.
if (TBB != CmpBB) {
assert(TBB == Tail && "Unexpected TBB");
HeadCmpBBCC = ARM64CC::getInvertedCondCode(HeadCmpBBCC);
}
CmpBBCond.clear();
TBB = FBB = 0;
if (TII->AnalyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) {
DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
++NumCmpBranchRejs;
return false;
}
if (!TBB || CmpBBCond.empty()) {
DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in CmpBB.\n");
++NumCmpBranchRejs;
return false;
}
if (!parseCond(CmpBBCond, CmpBBTailCC)) {
DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
++NumCmpBranchRejs;
return false;
}
if (TBB != Tail)
CmpBBTailCC = ARM64CC::getInvertedCondCode(CmpBBTailCC);
DEBUG(dbgs() << "Head->CmpBB on " << ARM64CC::getCondCodeName(HeadCmpBBCC)
<< ", CmpBB->Tail on " << ARM64CC::getCondCodeName(CmpBBTailCC)
<< '\n');
CmpMI = findConvertibleCompare(CmpBB);
if (!CmpMI)
return false;
if (!canSpeculateInstrs(CmpBB, CmpMI)) {
++NumSpeculateRejs;
return false;
}
return true;
}
void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
DEBUG(dbgs() << "Merging BB#" << CmpBB->getNumber() << " into BB#"
<< Head->getNumber() << ":\n" << *CmpBB);
// All CmpBB instructions are moved into Head, and CmpBB is deleted.
// Update the CFG first.
updateTailPHIs();
Head->removeSuccessor(CmpBB);
CmpBB->removeSuccessor(Tail);
Head->transferSuccessorsAndUpdatePHIs(CmpBB);
DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
TII->RemoveBranch(*Head);
// If the Head terminator was one of the cbz / tbz branches with built-in
// compare, we need to insert an explicit compare instruction in its place.
if (HeadCond[0].getImm() == -1) {
++NumCompBranches;
unsigned Opc = 0;
switch (HeadCond[1].getImm()) {
case ARM64::CBZW:
case ARM64::CBNZW:
Opc = ARM64::SUBSWri;
break;
case ARM64::CBZX:
case ARM64::CBNZX:
Opc = ARM64::SUBSXri;
break;
default:
llvm_unreachable("Cannot convert Head branch");
}
const MCInstrDesc &MCID = TII->get(Opc);
// Create a dummy virtual register for the SUBS def.
unsigned DestReg =
MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF));
// Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
BuildMI(*Head, Head->end(), TermDL, MCID)
.addReg(DestReg, RegState::Define | RegState::Dead)
.addOperand(HeadCond[2])
.addImm(0)
.addImm(0);
// SUBS uses the GPR*sp register classes.
MRI->constrainRegClass(HeadCond[2].getReg(),
TII->getRegClass(MCID, 1, TRI, *MF));
}
Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
// Now replace CmpMI with a ccmp instruction that also considers the incoming
// flags.
unsigned Opc = 0;
unsigned FirstOp = 1; // First CmpMI operand to copy.
bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
switch (CmpMI->getOpcode()) {
default:
llvm_unreachable("Unknown compare opcode");
case ARM64::SUBSWri: Opc = ARM64::CCMPWi; break;
case ARM64::SUBSWrr: Opc = ARM64::CCMPWr; break;
case ARM64::SUBSXri: Opc = ARM64::CCMPXi; break;
case ARM64::SUBSXrr: Opc = ARM64::CCMPXr; break;
case ARM64::ADDSWri: Opc = ARM64::CCMNWi; break;
case ARM64::ADDSWrr: Opc = ARM64::CCMNWr; break;
case ARM64::ADDSXri: Opc = ARM64::CCMNXi; break;
case ARM64::ADDSXrr: Opc = ARM64::CCMNXr; break;
case ARM64::FCMPSrr: Opc = ARM64::FCCMPSrr; FirstOp = 0; break;
case ARM64::FCMPDrr: Opc = ARM64::FCCMPDrr; FirstOp = 0; break;
case ARM64::FCMPESrr: Opc = ARM64::FCCMPESrr; FirstOp = 0; break;
case ARM64::FCMPEDrr: Opc = ARM64::FCCMPEDrr; FirstOp = 0; break;
case ARM64::CBZW:
case ARM64::CBNZW:
Opc = ARM64::CCMPWi;
FirstOp = 0;
isZBranch = true;
break;
case ARM64::CBZX:
case ARM64::CBNZX:
Opc = ARM64::CCMPXi;
FirstOp = 0;
isZBranch = true;
break;
}
// The ccmp instruction should set the flags according to the comparison when
// Head would have branched to CmpBB.
// The NZCV immediate operand should provide flags for the case where Head
// would have branched to Tail. These flags should cause the new Head
// terminator to branch to tail.
unsigned NZCV = ARM64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
const MCInstrDesc &MCID = TII->get(Opc);
MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
TII->getRegClass(MCID, 0, TRI, *MF));
if (CmpMI->getOperand(FirstOp + 1).isReg())
MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
TII->getRegClass(MCID, 1, TRI, *MF));
MachineInstrBuilder MIB =
BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
.addOperand(CmpMI->getOperand(FirstOp)); // Register Rn
if (isZBranch)
MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
else
MIB.addOperand(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
MIB.addImm(NZCV).addImm(HeadCmpBBCC);
// If CmpMI was a terminator, we need a new conditional branch to replace it.
// This now becomes a Head terminator.
if (isZBranch) {
bool isNZ = CmpMI->getOpcode() == ARM64::CBNZW ||
CmpMI->getOpcode() == ARM64::CBNZX;
BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(ARM64::Bcc))
.addImm(isNZ ? ARM64CC::NE : ARM64CC::EQ)
.addOperand(CmpMI->getOperand(1)); // Branch target.
}
CmpMI->eraseFromParent();
Head->updateTerminator();
RemovedBlocks.push_back(CmpBB);
CmpBB->eraseFromParent();
DEBUG(dbgs() << "Result:\n" << *Head);
++NumConverted;
}
int SSACCmpConv::expectedCodeSizeDelta() const {
int delta = 0;
// If the Head terminator was one of the cbz / tbz branches with built-in
// compare, we need to insert an explicit compare instruction in its place
// plus a branch instruction.
if (HeadCond[0].getImm() == -1) {
switch (HeadCond[1].getImm()) {
case ARM64::CBZW:
case ARM64::CBNZW:
case ARM64::CBZX:
case ARM64::CBNZX:
// Therefore delta += 1
delta = 1;
break;
default:
llvm_unreachable("Cannot convert Head branch");
}
}
// If the Cmp terminator was one of the cbz / tbz branches with
// built-in compare, it will be turned into a compare instruction
// into Head, but we do not save any instruction.
// Otherwise, we save the branch instruction.
switch (CmpMI->getOpcode()) {
default:
--delta;
break;
case ARM64::CBZW:
case ARM64::CBNZW:
case ARM64::CBZX:
case ARM64::CBNZX:
break;
}
return delta;
}
//===----------------------------------------------------------------------===//
// ARM64ConditionalCompares Pass
//===----------------------------------------------------------------------===//
namespace {
class ARM64ConditionalCompares : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const MCSchedModel *SchedModel;
// Does the proceeded function has Oz attribute.
bool MinSize;
MachineRegisterInfo *MRI;
MachineDominatorTree *DomTree;
MachineLoopInfo *Loops;
MachineTraceMetrics *Traces;
MachineTraceMetrics::Ensemble *MinInstr;
SSACCmpConv CmpConv;
public:
static char ID;
ARM64ConditionalCompares() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const;
bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const { return "ARM64 Conditional Compares"; }
private:
bool tryConvert(MachineBasicBlock *);
void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
void invalidateTraces();
bool shouldConvert();
};
} // end anonymous namespace
char ARM64ConditionalCompares::ID = 0;
namespace llvm {
void initializeARM64ConditionalComparesPass(PassRegistry &);
}
INITIALIZE_PASS_BEGIN(ARM64ConditionalCompares, "arm64-ccmp", "ARM64 CCMP Pass",
false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
INITIALIZE_PASS_END(ARM64ConditionalCompares, "arm64-ccmp", "ARM64 CCMP Pass",
false, false)
FunctionPass *llvm::createARM64ConditionalCompares() {
return new ARM64ConditionalCompares();
}
void ARM64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
AU.addRequired<MachineTraceMetrics>();
AU.addPreserved<MachineTraceMetrics>();
MachineFunctionPass::getAnalysisUsage(AU);
}
/// Update the dominator tree after if-conversion erased some blocks.
void
ARM64ConditionalCompares::updateDomTree(ArrayRef<MachineBasicBlock *> Removed) {
// convert() removes CmpBB which was previously dominated by Head.
// CmpBB children should be transferred to Head.
MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
for (unsigned i = 0, e = Removed.size(); i != e; ++i) {
MachineDomTreeNode *Node = DomTree->getNode(Removed[i]);
assert(Node != HeadNode && "Cannot erase the head node");
assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
while (Node->getNumChildren())
DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
DomTree->eraseNode(Removed[i]);
}
}
/// Update LoopInfo after if-conversion.
void
ARM64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
if (!Loops)
return;
for (unsigned i = 0, e = Removed.size(); i != e; ++i)
Loops->removeBlock(Removed[i]);
}
/// Invalidate MachineTraceMetrics before if-conversion.
void ARM64ConditionalCompares::invalidateTraces() {
Traces->invalidate(CmpConv.Head);
Traces->invalidate(CmpConv.CmpBB);
}
/// Apply cost model and heuristics to the if-conversion in IfConv.
/// Return true if the conversion is a good idea.
///
bool ARM64ConditionalCompares::shouldConvert() {
// Stress testing mode disables all cost considerations.
if (Stress)
return true;
if (!MinInstr)
MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
// Head dominates CmpBB, so it is always included in its trace.
MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);
// If code size is the main concern
if (MinSize) {
int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n');
// If we are minimizing the code size, do the conversion whatever
// the cost is.
if (CodeSizeDelta < 0)
return true;
if (CodeSizeDelta > 0) {
DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
return false;
}
// CodeSizeDelta == 0, continue with the regular heuristics
}
// Heuristic: The compare conversion delays the execution of the branch
// instruction because we must wait for the inputs to the second compare as
// well. The branch has no dependent instructions, but delaying it increases
// the cost of a misprediction.
//
// Set a limit on the delay we will accept.
unsigned DelayLimit = SchedModel->MispredictPenalty * 3 / 4;
// Instruction depths can be computed for all trace instructions above CmpBB.
unsigned HeadDepth =
Trace.getInstrCycles(CmpConv.Head->getFirstTerminator()).Depth;
unsigned CmpBBDepth =
Trace.getInstrCycles(CmpConv.CmpBB->getFirstTerminator()).Depth;
DEBUG(dbgs() << "Head depth: " << HeadDepth
<< "\nCmpBB depth: " << CmpBBDepth << '\n');
if (CmpBBDepth > HeadDepth + DelayLimit) {
DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
<< " cycles.\n");
return false;
}
// Check the resource depth at the bottom of CmpBB - these instructions will
// be speculated.
unsigned ResDepth = Trace.getResourceDepth(true);
DEBUG(dbgs() << "Resources: " << ResDepth << '\n');
// Heuristic: The speculatively executed instructions must all be able to
// merge into the Head block. The Head critical path should dominate the
// resource cost of the speculated instructions.
if (ResDepth > HeadDepth) {
DEBUG(dbgs() << "Too many instructions to speculate.\n");
return false;
}
return true;
}
bool ARM64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
bool Changed = false;
while (CmpConv.canConvert(MBB) && shouldConvert()) {
invalidateTraces();
SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
CmpConv.convert(RemovedBlocks);
Changed = true;
updateDomTree(RemovedBlocks);
updateLoops(RemovedBlocks);
}
return Changed;
}
bool ARM64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** ARM64 Conditional Compares **********\n"
<< "********** Function: " << MF.getName() << '\n');
TII = MF.getTarget().getInstrInfo();
TRI = MF.getTarget().getRegisterInfo();
SchedModel =
MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel();
MRI = &MF.getRegInfo();
DomTree = &getAnalysis<MachineDominatorTree>();
Loops = getAnalysisIfAvailable<MachineLoopInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = 0;
MinSize = MF.getFunction()->getAttributes().hasAttribute(
AttributeSet::FunctionIndex, Attribute::MinSize);
bool Changed = false;
CmpConv.runOnMachineFunction(MF);
// Visit blocks in dominator tree pre-order. The pre-order enables multiple
// cmp-conversions from the same head block.
// Note that updateDomTree() modifies the children of the DomTree node
// currently being visited. The df_iterator supports that, it doesn't look at
// child_begin() / child_end() until after a node has been visited.
for (df_iterator<MachineDominatorTree *> I = df_begin(DomTree),
E = df_end(DomTree);
I != E; ++I)
if (tryConvert(I->getBlock()))
Changed = true;
return Changed;
}

View File

@ -0,0 +1,104 @@
//===-- ARM64DeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// When allowed by the instruction, replace a dead definition of a GPR with
// the zero register. This makes the code a bit friendlier towards the
// hardware's register renamer.
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm64-dead-defs"
#include "ARM64.h"
#include "ARM64RegisterInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");
namespace {
class ARM64DeadRegisterDefinitions : public MachineFunctionPass {
private:
bool processMachineBasicBlock(MachineBasicBlock *MBB);
public:
static char ID; // Pass identification, replacement for typeid.
explicit ARM64DeadRegisterDefinitions() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &F);
const char *getPassName() const { return "Dead register definitions"; }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
char ARM64DeadRegisterDefinitions::ID = 0;
} // end anonymous namespace
bool
ARM64DeadRegisterDefinitions::processMachineBasicBlock(MachineBasicBlock *MBB) {
bool Changed = false;
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
++I) {
MachineInstr *MI = I;
for (int i = 0, e = MI->getDesc().getNumDefs(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDead() && MO.isDef()) {
assert(!MO.isImplicit() && "Unexpected implicit def!");
DEBUG(dbgs() << " Dead def operand #" << i << " in:\n ";
MI->print(dbgs()));
// Be careful not to change the register if it's a tied operand.
if (MI->isRegTiedToUseOperand(i)) {
DEBUG(dbgs() << " Ignoring, def is tied operand.\n");
continue;
}
// Make sure the instruction take a register class that contains
// the zero register and replace it if so.
unsigned NewReg;
switch (MI->getDesc().OpInfo[i].RegClass) {
default:
DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
continue;
case ARM64::GPR32RegClassID:
NewReg = ARM64::WZR;
break;
case ARM64::GPR64RegClassID:
NewReg = ARM64::XZR;
break;
}
DEBUG(dbgs() << " Replacing with zero register. New:\n ");
MO.setReg(NewReg);
DEBUG(MI->print(dbgs()));
++NumDeadDefsReplaced;
}
}
}
return Changed;
}
// Scan the function for instructions that have a dead definition of a
// register. Replace that register with the zero register when possible.
bool ARM64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &mf) {
MachineFunction *MF = &mf;
bool Changed = false;
DEBUG(dbgs() << "***** ARM64DeadRegisterDefinitions *****\n");
for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
if (processMachineBasicBlock(I))
Changed = true;
return Changed;
}
FunctionPass *llvm::createARM64DeadRegisterDefinitions() {
return new ARM64DeadRegisterDefinitions();
}

View File

@ -0,0 +1,726 @@
//===-- ARM64ExpandPseudoInsts.cpp - Expand pseudo instructions ---*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains a pass that expands pseudo instructions into target
// instructions to allow proper scheduling and other late optimizations. This
// pass should be run after register allocation but before the post-regalloc
// scheduling pass.
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/ARM64AddressingModes.h"
#include "ARM64InstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/MathExtras.h"
using namespace llvm;
namespace {
class ARM64ExpandPseudo : public MachineFunctionPass {
public:
static char ID;
ARM64ExpandPseudo() : MachineFunctionPass(ID) {}
const ARM64InstrInfo *TII;
virtual bool runOnMachineFunction(MachineFunction &Fn);
virtual const char *getPassName() const {
return "ARM64 pseudo instruction expansion pass";
}
private:
bool expandMBB(MachineBasicBlock &MBB);
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned BitSize);
};
char ARM64ExpandPseudo::ID = 0;
}
/// \brief Transfer implicit operands on the pseudo instruction to the
/// instructions created from the expansion.
static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
MachineInstrBuilder &DefMI) {
const MCInstrDesc &Desc = OldMI.getDesc();
for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
++i) {
const MachineOperand &MO = OldMI.getOperand(i);
assert(MO.isReg() && MO.getReg());
if (MO.isUse())
UseMI.addOperand(MO);
else
DefMI.addOperand(MO);
}
}
/// \brief Helper function which extracts the specified 16-bit chunk from a
/// 64-bit value.
static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
assert(ChunkIdx < 4 && "Out of range chunk index specified!");
return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
}
/// \brief Helper function which replicates a 16-bit chunk within a 64-bit
/// value. Indices correspond to element numbers in a v4i16.
static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) {
assert((FromIdx < 4) && (ToIdx < 4) && "Out of range chunk index specified!");
const unsigned ShiftAmt = ToIdx * 16;
// Replicate the source chunk to the destination position.
const uint64_t Chunk = getChunk(Imm, FromIdx) << ShiftAmt;
// Clear the destination chunk.
Imm &= ~(0xFFFFLL << ShiftAmt);
// Insert the replicated chunk.
return Imm | Chunk;
}
/// \brief Helper function which tries to materialize a 64-bit value with an
/// ORR + MOVK instruction sequence.
static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
const ARM64InstrInfo *TII, unsigned ChunkIdx) {
assert(ChunkIdx < 4 && "Out of range chunk index specified!");
const unsigned ShiftAmt = ChunkIdx * 16;
uint64_t Encoding;
if (ARM64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) {
// Create the ORR-immediate instruction.
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri))
.addOperand(MI.getOperand(0))
.addReg(ARM64::XZR)
.addImm(Encoding);
// Create the MOVK instruction.
const unsigned Imm16 = getChunk(UImm, ChunkIdx);
const unsigned DstReg = MI.getOperand(0).getReg();
const bool DstIsDead = MI.getOperand(0).isDead();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg)
.addImm(Imm16)
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt));
transferImpOps(MI, MIB, MIB1);
MI.eraseFromParent();
return true;
}
return false;
}
/// \brief Check whether the given 16-bit chunk replicated to full 64-bit width
/// can be materialized with an ORR instruction.
static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
return ARM64_AM::processLogicalImmediate(Chunk, 64, Encoding);
}
/// \brief Check for identical 16-bit chunks within the constant and if so
/// materialize them with a single ORR instruction. The remaining one or two
/// 16-bit chunks will be materialized with MOVK instructions.
///
/// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order
/// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with
/// an ORR instruction.
///
static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
const ARM64InstrInfo *TII) {
typedef DenseMap<uint64_t, unsigned> CountMap;
CountMap Counts;
// Scan the constant and count how often every chunk occurs.
for (unsigned Idx = 0; Idx < 4; ++Idx)
++Counts[getChunk(UImm, Idx)];
// Traverse the chunks to find one which occurs more than once.
for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
Chunk != End; ++Chunk) {
const uint64_t ChunkVal = Chunk->first;
const unsigned Count = Chunk->second;
uint64_t Encoding = 0;
// We are looking for chunks which have two or three instances and can be
// materialized with an ORR instruction.
if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
continue;
const bool CountThree = Count == 3;
// Create the ORR-immediate instruction.
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri))
.addOperand(MI.getOperand(0))
.addReg(ARM64::XZR)
.addImm(Encoding);
const unsigned DstReg = MI.getOperand(0).getReg();
const bool DstIsDead = MI.getOperand(0).isDead();
unsigned ShiftAmt = 0;
uint64_t Imm16 = 0;
// Find the first chunk not materialized with the ORR instruction.
for (; ShiftAmt < 64; ShiftAmt += 16) {
Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
if (Imm16 != ChunkVal)
break;
}
// Create the first MOVK instruction.
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
.addReg(DstReg,
RegState::Define | getDeadRegState(DstIsDead && CountThree))
.addReg(DstReg)
.addImm(Imm16)
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt));
// In case we have three instances the whole constant is now materialized
// and we can exit.
if (CountThree) {
transferImpOps(MI, MIB, MIB1);
MI.eraseFromParent();
return true;
}
// Find the remaining chunk which needs to be materialized.
for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
if (Imm16 != ChunkVal)
break;
}
// Create the second MOVK instruction.
MachineInstrBuilder MIB2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg)
.addImm(Imm16)
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt));
transferImpOps(MI, MIB, MIB2);
MI.eraseFromParent();
return true;
}
return false;
}
/// \brief Check whether this chunk matches the pattern '1...0...'. This pattern
/// starts a contiguous sequence of ones if we look at the bits from the LSB
/// towards the MSB.
static bool isStartChunk(uint64_t Chunk) {
if (Chunk == 0 || Chunk == UINT64_MAX)
return false;
return (CountLeadingOnes_64(Chunk) + countTrailingZeros(Chunk)) == 64;
}
/// \brief Check whether this chunk matches the pattern '0...1...' This pattern
/// ends a contiguous sequence of ones if we look at the bits from the LSB
/// towards the MSB.
static bool isEndChunk(uint64_t Chunk) {
if (Chunk == 0 || Chunk == UINT64_MAX)
return false;
return (countLeadingZeros(Chunk) + CountTrailingOnes_64(Chunk)) == 64;
}
/// \brief Clear or set all bits in the chunk at the given index.
static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
const uint64_t Mask = 0xFFFF;
if (Clear)
// Clear chunk in the immediate.
Imm &= ~(Mask << (Idx * 16));
else
// Set all bits in the immediate for the particular chunk.
Imm |= Mask << (Idx * 16);
return Imm;
}
/// \brief Check whether the constant contains a sequence of contiguous ones,
/// which might be interrupted by one or two chunks. If so, materialize the
/// sequence of contiguous ones with an ORR instruction.
/// Materialize the chunks which are either interrupting the sequence or outside
/// of the sequence with a MOVK instruction.
///
/// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
/// which ends the sequence (0...1...). Then we are looking for constants which
/// contain at least one S and E chunk.
/// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|.
///
/// We are also looking for constants like |S|A|B|E| where the contiguous
/// sequence of ones wraps around the MSB into the LSB.
///
static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
const ARM64InstrInfo *TII) {
const int NotSet = -1;
const uint64_t Mask = 0xFFFF;
int StartIdx = NotSet;
int EndIdx = NotSet;
// Try to find the chunks which start/end a contiguous sequence of ones.
for (int Idx = 0; Idx < 4; ++Idx) {
int64_t Chunk = getChunk(UImm, Idx);
// Sign extend the 16-bit chunk to 64-bit.
Chunk = (Chunk << 48) >> 48;
if (isStartChunk(Chunk))
StartIdx = Idx;
else if (isEndChunk(Chunk))
EndIdx = Idx;
}
// Early exit in case we can't find a start/end chunk.
if (StartIdx == NotSet || EndIdx == NotSet)
return false;
// Outside of the contiguous sequence of ones everything needs to be zero.
uint64_t Outside = 0;
// Chunks between the start and end chunk need to have all their bits set.
uint64_t Inside = Mask;
// If our contiguous sequence of ones wraps around from the MSB into the LSB,
// just swap indices and pretend we are materializing a contiguous sequence
// of zeros surrounded by a contiguous sequence of ones.
if (StartIdx > EndIdx) {
std::swap(StartIdx, EndIdx);
std::swap(Outside, Inside);
}
uint64_t OrrImm = UImm;
int FirstMovkIdx = NotSet;
int SecondMovkIdx = NotSet;
// Find out which chunks we need to patch up to obtain a contiguous sequence
// of ones.
for (int Idx = 0; Idx < 4; ++Idx) {
const uint64_t Chunk = getChunk(UImm, Idx);
// Check whether we are looking at a chunk which is not part of the
// contiguous sequence of ones.
if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
OrrImm = updateImm(OrrImm, Idx, Outside == 0);
// Remember the index we need to patch.
if (FirstMovkIdx == NotSet)
FirstMovkIdx = Idx;
else
SecondMovkIdx = Idx;
// Check whether we are looking a chunk which is part of the contiguous
// sequence of ones.
} else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
// Remember the index we need to patch.
if (FirstMovkIdx == NotSet)
FirstMovkIdx = Idx;
else
SecondMovkIdx = Idx;
}
}
assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
// Create the ORR-immediate instruction.
uint64_t Encoding = 0;
ARM64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri))
.addOperand(MI.getOperand(0))
.addReg(ARM64::XZR)
.addImm(Encoding);
const unsigned DstReg = MI.getOperand(0).getReg();
const bool DstIsDead = MI.getOperand(0).isDead();
const bool SingleMovk = SecondMovkIdx == NotSet;
// Create the first MOVK instruction.
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
.addReg(DstReg,
RegState::Define | getDeadRegState(DstIsDead && SingleMovk))
.addReg(DstReg)
.addImm(getChunk(UImm, FirstMovkIdx))
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, FirstMovkIdx * 16));
// Early exit in case we only need to emit a single MOVK instruction.
if (SingleMovk) {
transferImpOps(MI, MIB, MIB1);
MI.eraseFromParent();
return true;
}
// Create the second MOVK instruction.
MachineInstrBuilder MIB2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg)
.addImm(getChunk(UImm, SecondMovkIdx))
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, SecondMovkIdx * 16));
transferImpOps(MI, MIB, MIB2);
MI.eraseFromParent();
return true;
}
/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
/// real move-immediate instructions to synthesize the immediate.
bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned BitSize) {
MachineInstr &MI = *MBBI;
uint64_t Imm = MI.getOperand(1).getImm();
const unsigned Mask = 0xFFFF;
// Try a MOVI instruction (aka ORR-immediate with the zero register).
uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
uint64_t Encoding;
if (ARM64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
unsigned Opc = (BitSize == 32 ? ARM64::ORRWri : ARM64::ORRXri);
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
.addOperand(MI.getOperand(0))
.addReg(BitSize == 32 ? ARM64::WZR : ARM64::XZR)
.addImm(Encoding);
transferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
return true;
}
// Scan the immediate and count the number of 16-bit chunks which are either
// all ones or all zeros.
unsigned OneChunks = 0;
unsigned ZeroChunks = 0;
for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
const unsigned Chunk = (Imm >> Shift) & Mask;
if (Chunk == Mask)
OneChunks++;
else if (Chunk == 0)
ZeroChunks++;
}
// Since we can't materialize the constant with a single ORR instruction,
// let's see whether we can materialize 3/4 of the constant with an ORR
// instruction and use an additional MOVK instruction to materialize the
// remaining 1/4.
//
// We are looking for constants with a pattern like: |A|X|B|X| or |X|A|X|B|.
//
// E.g. assuming |A|X|A|X| is a pattern which can be materialized with ORR,
// we would create the following instruction sequence:
//
// ORR x0, xzr, |A|X|A|X|
// MOVK x0, |B|, LSL #16
//
// Only look at 64-bit constants which can't be materialized with a single
// instruction e.g. which have less than either three all zero or all one
// chunks.
//
// Ignore 32-bit constants here, they always can be materialized with a
// MOVZ/MOVN + MOVK pair. Since the 32-bit constant can't be materialized
// with a single ORR, the best sequence we can achieve is a ORR + MOVK pair.
// Thus we fall back to the default code below which in the best case creates
// a single MOVZ/MOVN instruction (in case one chunk is all zero or all one).
//
if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) {
// If we interpret the 64-bit constant as a v4i16, are elements 0 and 2
// identical?
if (getChunk(UImm, 0) == getChunk(UImm, 2)) {
// See if we can come up with a constant which can be materialized with
// ORR-immediate by replicating element 3 into element 1.
uint64_t OrrImm = replicateChunk(UImm, 3, 1);
if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 1))
return true;
// See if we can come up with a constant which can be materialized with
// ORR-immediate by replicating element 1 into element 3.
OrrImm = replicateChunk(UImm, 1, 3);
if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 3))
return true;
// If we interpret the 64-bit constant as a v4i16, are elements 1 and 3
// identical?
} else if (getChunk(UImm, 1) == getChunk(UImm, 3)) {
// See if we can come up with a constant which can be materialized with
// ORR-immediate by replicating element 2 into element 0.
uint64_t OrrImm = replicateChunk(UImm, 2, 0);
if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 0))
return true;
// See if we can come up with a constant which can be materialized with
// ORR-immediate by replicating element 1 into element 3.
OrrImm = replicateChunk(UImm, 0, 2);
if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 2))
return true;
}
}
// Check for identical 16-bit chunks within the constant and if so materialize
// them with a single ORR instruction. The remaining one or two 16-bit chunks
// will be materialized with MOVK instructions.
if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII))
return true;
// Check whether the constant contains a sequence of contiguous ones, which
// might be interrupted by one or two chunks. If so, materialize the sequence
// of contiguous ones with an ORR instruction. Materialize the chunks which
// are either interrupting the sequence or outside of the sequence with a
// MOVK instruction.
if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII))
return true;
// Use a MOVZ or MOVN instruction to set the high bits, followed by one or
// more MOVK instructions to insert additional 16-bit portions into the
// lower bits.
bool isNeg = false;
// Use MOVN to materialize the high bits if we have more all one chunks
// than all zero chunks.
if (OneChunks > ZeroChunks) {
isNeg = true;
Imm = ~Imm;
}
unsigned FirstOpc;
if (BitSize == 32) {
Imm &= (1LL << 32) - 1;
FirstOpc = (isNeg ? ARM64::MOVNWi : ARM64::MOVZWi);
} else {
FirstOpc = (isNeg ? ARM64::MOVNXi : ARM64::MOVZXi);
}
unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN
unsigned LastShift = 0; // LSL amount for last MOVK
if (Imm != 0) {
unsigned LZ = countLeadingZeros(Imm);
unsigned TZ = countTrailingZeros(Imm);
Shift = ((63 - LZ) / 16) * 16;
LastShift = (TZ / 16) * 16;
}
unsigned Imm16 = (Imm >> Shift) & Mask;
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc))
.addReg(DstReg, RegState::Define |
getDeadRegState(DstIsDead && Shift == LastShift))
.addImm(Imm16)
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, Shift));
// If a MOVN was used for the high bits of a negative value, flip the rest
// of the bits back for use with MOVK.
if (isNeg)
Imm = ~Imm;
if (Shift == LastShift) {
transferImpOps(MI, MIB1, MIB1);
MI.eraseFromParent();
return true;
}
MachineInstrBuilder MIB2;
unsigned Opc = (BitSize == 32 ? ARM64::MOVKWi : ARM64::MOVKXi);
while (Shift != LastShift) {
Shift -= 16;
Imm16 = (Imm >> Shift) & Mask;
if (Imm16 == (isNeg ? Mask : 0))
continue; // This 16-bit portion is already set correctly.
MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
.addReg(DstReg,
RegState::Define |
getDeadRegState(DstIsDead && Shift == LastShift))
.addReg(DstReg)
.addImm(Imm16)
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, Shift));
}
transferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
return true;
}
/// \brief If MBBI references a pseudo instruction that should be expanded here,
/// do the expansion and return true. Otherwise return false.
bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
default:
break;
case ARM64::ADDWrr:
case ARM64::SUBWrr:
case ARM64::ADDXrr:
case ARM64::SUBXrr:
case ARM64::ADDSWrr:
case ARM64::SUBSWrr:
case ARM64::ADDSXrr:
case ARM64::SUBSXrr:
case ARM64::ANDWrr:
case ARM64::ANDXrr:
case ARM64::BICWrr:
case ARM64::BICXrr:
case ARM64::EONWrr:
case ARM64::EONXrr:
case ARM64::EORWrr:
case ARM64::EORXrr:
case ARM64::ORNWrr:
case ARM64::ORNXrr:
case ARM64::ORRWrr:
case ARM64::ORRXrr: {
unsigned Opcode;
switch (MI.getOpcode()) {
default:
return false;
case ARM64::ADDWrr: Opcode = ARM64::ADDWrs; break;
case ARM64::SUBWrr: Opcode = ARM64::SUBWrs; break;
case ARM64::ADDXrr: Opcode = ARM64::ADDXrs; break;
case ARM64::SUBXrr: Opcode = ARM64::SUBXrs; break;
case ARM64::ADDSWrr: Opcode = ARM64::ADDSWrs; break;
case ARM64::SUBSWrr: Opcode = ARM64::SUBSWrs; break;
case ARM64::ADDSXrr: Opcode = ARM64::ADDSXrs; break;
case ARM64::SUBSXrr: Opcode = ARM64::SUBSXrs; break;
case ARM64::ANDWrr: Opcode = ARM64::ANDWrs; break;
case ARM64::ANDXrr: Opcode = ARM64::ANDXrs; break;
case ARM64::BICWrr: Opcode = ARM64::BICWrs; break;
case ARM64::BICXrr: Opcode = ARM64::BICXrs; break;
case ARM64::EONWrr: Opcode = ARM64::EONWrs; break;
case ARM64::EONXrr: Opcode = ARM64::EONXrs; break;
case ARM64::EORWrr: Opcode = ARM64::EORWrs; break;
case ARM64::EORXrr: Opcode = ARM64::EORXrs; break;
case ARM64::ORNWrr: Opcode = ARM64::ORNWrs; break;
case ARM64::ORNXrr: Opcode = ARM64::ORNXrs; break;
case ARM64::ORRWrr: Opcode = ARM64::ORRWrs; break;
case ARM64::ORRXrr: Opcode = ARM64::ORRXrs; break;
}
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
MI.getOperand(0).getReg())
.addOperand(MI.getOperand(1))
.addOperand(MI.getOperand(2))
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0));
transferImpOps(MI, MIB1, MIB1);
MI.eraseFromParent();
return true;
}
case ARM64::LOADgot: {
// Expand into ADRP + LDR.
unsigned DstReg = MI.getOperand(0).getReg();
const MachineOperand &MO1 = MI.getOperand(1);
unsigned Flags = MO1.getTargetFlags();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADRP), DstReg);
MachineInstrBuilder MIB2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::LDRXui))
.addOperand(MI.getOperand(0))
.addReg(DstReg);
if (MO1.isGlobal()) {
MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | ARM64II::MO_PAGE);
MIB2.addGlobalAddress(MO1.getGlobal(), 0,
Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
} else if (MO1.isSymbol()) {
MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | ARM64II::MO_PAGE);
MIB2.addExternalSymbol(MO1.getSymbolName(),
Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
} else {
assert(MO1.isCPI() &&
"Only expect globals, externalsymbols, or constant pools");
MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
Flags | ARM64II::MO_PAGE);
MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
}
transferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
return true;
}
case ARM64::MOVaddr:
case ARM64::MOVaddrJT:
case ARM64::MOVaddrCP:
case ARM64::MOVaddrBA:
case ARM64::MOVaddrTLS:
case ARM64::MOVaddrEXT: {
// Expand into ADRP + ADD.
unsigned DstReg = MI.getOperand(0).getReg();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADRP), DstReg)
.addOperand(MI.getOperand(1));
MachineInstrBuilder MIB2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADDXri))
.addOperand(MI.getOperand(0))
.addReg(DstReg)
.addOperand(MI.getOperand(2))
.addImm(0);
transferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
return true;
}
case ARM64::MOVi32imm:
return expandMOVImm(MBB, MBBI, 32);
case ARM64::MOVi64imm:
return expandMOVImm(MBB, MBBI, 64);
case ARM64::RET_ReallyLR:
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::RET))
.addReg(ARM64::LR);
MI.eraseFromParent();
return true;
}
return false;
}
/// \brief Iterate over the instructions in basic block MBB and expand any
/// pseudo instructions. Return true if anything was modified.
bool ARM64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
bool Modified = false;
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
while (MBBI != E) {
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
Modified |= expandMI(MBB, MBBI);
MBBI = NMBBI;
}
return Modified;
}
bool ARM64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
bool Modified = false;
for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
++MFI)
Modified |= expandMBB(*MFI);
return Modified;
}
/// \brief Returns an instance of the pseudo instruction expansion pass.
FunctionPass *llvm::createARM64ExpandPseudoPass() {
return new ARM64ExpandPseudo();
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,818 @@
//===- ARM64FrameLowering.cpp - ARM64 Frame Lowering -----------*- C++ -*-====//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the ARM64 implementation of TargetFrameLowering class.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "frame-info"
#include "ARM64FrameLowering.h"
#include "ARM64InstrInfo.h"
#include "ARM64MachineFunctionInfo.h"
#include "ARM64Subtarget.h"
#include "ARM64TargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
static cl::opt<bool> EnableRedZone("arm64-redzone",
cl::desc("enable use of redzone on ARM64"),
cl::init(false), cl::Hidden);
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
static unsigned estimateStackSize(MachineFunction &MF) {
const MachineFrameInfo *FFI = MF.getFrameInfo();
int Offset = 0;
for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
int FixedOff = -FFI->getObjectOffset(i);
if (FixedOff > Offset)
Offset = FixedOff;
}
for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
if (FFI->isDeadObjectIndex(i))
continue;
Offset += FFI->getObjectSize(i);
unsigned Align = FFI->getObjectAlignment(i);
// Adjust to alignment boundary
Offset = (Offset + Align - 1) / Align * Align;
}
// This does not include the 16 bytes used for fp and lr.
return (unsigned)Offset;
}
bool ARM64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
if (!EnableRedZone)
return false;
// Don't use the red zone if the function explicitly asks us not to.
// This is typically used for kernel code.
if (MF.getFunction()->getAttributes().hasAttribute(
AttributeSet::FunctionIndex, Attribute::NoRedZone))
return false;
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
unsigned NumBytes = AFI->getLocalStackSize();
// Note: currently hasFP() is always true for hasCalls(), but that's an
// implementation detail of the current code, not a strict requirement,
// so stay safe here and check both.
if (MFI->hasCalls() || hasFP(MF) || NumBytes > 128)
return false;
return true;
}
/// hasFP - Return true if the specified function should have a dedicated frame
/// pointer register.
bool ARM64FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
#ifndef NDEBUG
const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
assert(!RegInfo->needsStackRealignment(MF) &&
"No stack realignment on ARM64!");
#endif
return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken());
}
/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
/// not required, we reserve argument space for call sites in the function
/// immediately on entry to the current function. This eliminates the need for
/// add/sub sp brackets around call sites. Returns true if the call frame is
/// included as part of the stack frame.
bool ARM64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
return !MF.getFrameInfo()->hasVarSizedObjects();
}
void ARM64FrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
const ARM64InstrInfo *TII =
static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
if (!TFI->hasReservedCallFrame(MF)) {
// If we have alloca, convert as follows:
// ADJCALLSTACKDOWN -> sub, sp, sp, amount
// ADJCALLSTACKUP -> add, sp, sp, amount
MachineInstr *Old = I;
DebugLoc DL = Old->getDebugLoc();
unsigned Amount = Old->getOperand(0).getImm();
if (Amount != 0) {
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
unsigned Align = TFI->getStackAlignment();
Amount = (Amount + Align - 1) / Align * Align;
// Replace the pseudo instruction with a new instruction...
unsigned Opc = Old->getOpcode();
if (Opc == ARM64::ADJCALLSTACKDOWN) {
emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, -Amount, TII);
} else {
assert(Opc == ARM64::ADJCALLSTACKUP && "expected ADJCALLSTACKUP");
emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, Amount, TII);
}
}
}
MBB.erase(I);
}
void
ARM64FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned FramePtr) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
const ARM64InstrInfo *TII = TM.getInstrInfo();
DebugLoc DL = MBB.findDebugLoc(MBBI);
// Add callee saved registers to move list.
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
if (CSI.empty())
return;
const DataLayout *TD = MF.getTarget().getDataLayout();
bool HasFP = hasFP(MF);
// Calculate amount of bytes used for return address storing.
int stackGrowth = -TD->getPointerSize(0);
// Calculate offsets.
int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth;
unsigned TotalSkipped = 0;
for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
E = CSI.end();
I != E; ++I) {
unsigned Reg = I->getReg();
int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()) -
getOffsetOfLocalArea() + saveAreaOffset;
// Don't output a new CFI directive if we're re-saving the frame pointer or
// link register. This happens when the PrologEpilogInserter has inserted an
// extra "STP" of the frame pointer and link register -- the "emitPrologue"
// method automatically generates the directives when frame pointers are
// used. If we generate CFI directives for the extra "STP"s, the linker will
// lose track of the correct values for the frame pointer and link register.
if (HasFP && (FramePtr == Reg || Reg == ARM64::LR)) {
TotalSkipped += stackGrowth;
continue;
}
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
nullptr, DwarfReg, Offset - TotalSkipped));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
}
void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
MachineBasicBlock::iterator MBBI = MBB.begin();
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *Fn = MF.getFunction();
const ARM64RegisterInfo *RegInfo = TM.getRegisterInfo();
const ARM64InstrInfo *TII = TM.getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
bool HasFP = hasFP(MF);
DebugLoc DL = MBB.findDebugLoc(MBBI);
int NumBytes = (int)MFI->getStackSize();
if (!AFI->hasStackFrame()) {
assert(!HasFP && "unexpected function without stack frame but with FP");
// All of the stack allocation is for locals.
AFI->setLocalStackSize(NumBytes);
// Label used to tie together the PROLOG_LABEL and the MachineMoves.
MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
// REDZONE: If the stack size is less than 128 bytes, we don't need
// to actually allocate.
if (NumBytes && !canUseRedZone(MF)) {
emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII,
MachineInstr::FrameSetup);
// Encode the stack size of the leaf function.
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
} else if (NumBytes) {
++NumRedZoneFunctions;
}
return;
}
// Only set up FP if we actually need to.
int FPOffset = 0;
if (HasFP) {
// First instruction must a) allocate the stack and b) have an immediate
// that is a multiple of -2.
assert((MBBI->getOpcode() == ARM64::STPXpre ||
MBBI->getOpcode() == ARM64::STPDpre) &&
MBBI->getOperand(2).getReg() == ARM64::SP &&
MBBI->getOperand(3).getImm() < 0 &&
(MBBI->getOperand(3).getImm() & 1) == 0);
// Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space
// required for the callee saved register area we get the frame pointer
// by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
FPOffset = -(MBBI->getOperand(3).getImm() + 2) * 8;
assert(FPOffset >= 0 && "Bad Framepointer Offset");
}
// Move past the saves of the callee-saved registers.
while (MBBI->getOpcode() == ARM64::STPXi ||
MBBI->getOpcode() == ARM64::STPDi ||
MBBI->getOpcode() == ARM64::STPXpre ||
MBBI->getOpcode() == ARM64::STPDpre) {
++MBBI;
NumBytes -= 16;
}
assert(NumBytes >= 0 && "Negative stack allocation size!?");
if (HasFP) {
// Issue sub fp, sp, FPOffset or
// mov fp,sp when FPOffset is zero.
// Note: All stores of callee-saved registers are marked as "FrameSetup".
// This code marks the instruction(s) that set the FP also.
emitFrameOffset(MBB, MBBI, DL, ARM64::FP, ARM64::SP, FPOffset, TII,
MachineInstr::FrameSetup);
}
// All of the remaining stack allocations are for locals.
AFI->setLocalStackSize(NumBytes);
// Allocate space for the rest of the frame.
if (NumBytes) {
// If we're a leaf function, try using the red zone.
if (!canUseRedZone(MF))
emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII,
MachineInstr::FrameSetup);
}
// If we need a base pointer, set it up here. It's whatever the value of the
// stack pointer is at this point. Any variable size objects will be allocated
// after this, so we can still use the base pointer to reference locals.
//
// FIXME: Clarify FrameSetup flags here.
// Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
// needed.
//
if (RegInfo->hasBasePointer(MF))
TII->copyPhysReg(MBB, MBBI, DL, ARM64::X19, ARM64::SP, false);
if (needsFrameMoves) {
const DataLayout *TD = MF.getTarget().getDataLayout();
const int StackGrowth = -TD->getPointerSize(0);
unsigned FramePtr = RegInfo->getFrameRegister(MF);
// An example of the prologue:
//
// .globl __foo
// .align 2
// __foo:
// Ltmp0:
// .cfi_startproc
// .cfi_personality 155, ___gxx_personality_v0
// Leh_func_begin:
// .cfi_lsda 16, Lexception33
//
// stp xa,bx, [sp, -#offset]!
// ...
// stp x28, x27, [sp, #offset-32]
// stp fp, lr, [sp, #offset-16]
// add fp, sp, #offset - 16
// sub sp, sp, #1360
//
// The Stack:
// +-------------------------------------------+
// 10000 | ........ | ........ | ........ | ........ |
// 10004 | ........ | ........ | ........ | ........ |
// +-------------------------------------------+
// 10008 | ........ | ........ | ........ | ........ |
// 1000c | ........ | ........ | ........ | ........ |
// +===========================================+
// 10010 | X28 Register |
// 10014 | X28 Register |
// +-------------------------------------------+
// 10018 | X27 Register |
// 1001c | X27 Register |
// +===========================================+
// 10020 | Frame Pointer |
// 10024 | Frame Pointer |
// +-------------------------------------------+
// 10028 | Link Register |
// 1002c | Link Register |
// +===========================================+
// 10030 | ........ | ........ | ........ | ........ |
// 10034 | ........ | ........ | ........ | ........ |
// +-------------------------------------------+
// 10038 | ........ | ........ | ........ | ........ |
// 1003c | ........ | ........ | ........ | ........ |
// +-------------------------------------------+
//
// [sp] = 10030 :: >>initial value<<
// sp = 10020 :: stp fp, lr, [sp, #-16]!
// fp = sp == 10020 :: mov fp, sp
// [sp] == 10020 :: stp x28, x27, [sp, #-16]!
// sp == 10010 :: >>final value<<
//
// The frame pointer (w29) points to address 10020. If we use an offset of
// '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
// for w27, and -32 for w28:
//
// Ltmp1:
// .cfi_def_cfa w29, 16
// Ltmp2:
// .cfi_offset w30, -8
// Ltmp3:
// .cfi_offset w29, -16
// Ltmp4:
// .cfi_offset w27, -24
// Ltmp5:
// .cfi_offset w28, -32
if (HasFP) {
// Define the current CFA rule to use the provided FP.
unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
// Record the location of the stored LR
unsigned LR = RegInfo->getDwarfRegNum(ARM64::LR, true);
CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
// Record the location of the stored FP
CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
} else {
// Encode the stack size of the leaf function.
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
// Now emit the moves for whatever callee saved regs we have.
emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr);
}
}
static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
for (unsigned i = 0; CSRegs[i]; ++i)
if (Reg == CSRegs[i])
return true;
return false;
}
static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) {
if (MI->getOpcode() == ARM64::LDPXpost ||
MI->getOpcode() == ARM64::LDPDpost || MI->getOpcode() == ARM64::LDPXi ||
MI->getOpcode() == ARM64::LDPDi) {
if (!isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) ||
!isCalleeSavedRegister(MI->getOperand(1).getReg(), CSRegs) ||
MI->getOperand(2).getReg() != ARM64::SP)
return false;
return true;
}
return false;
}
void ARM64FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
MachineFrameInfo *MFI = MF.getFrameInfo();
const ARM64InstrInfo *TII =
static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
const ARM64RegisterInfo *RegInfo =
static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
DebugLoc DL = MBBI->getDebugLoc();
unsigned NumBytes = MFI->getStackSize();
unsigned NumRestores = 0;
// Move past the restores of the callee-saved registers.
MachineBasicBlock::iterator LastPopI = MBBI;
const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
if (LastPopI != MBB.begin()) {
do {
++NumRestores;
--LastPopI;
} while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs));
if (!isCSRestore(LastPopI, CSRegs)) {
++LastPopI;
--NumRestores;
}
}
NumBytes -= NumRestores * 16;
assert(NumBytes >= 0 && "Negative stack allocation size!?");
if (!hasFP(MF)) {
// If this was a redzone leaf function, we don't need to restore the
// stack pointer.
if (!canUseRedZone(MF))
emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::SP, NumBytes, TII);
return;
}
// Restore the original stack pointer.
// FIXME: Rather than doing the math here, we should instead just use
// non-post-indexed loads for the restores if we aren't actually going to
// be able to save any instructions.
if (NumBytes || MFI->hasVarSizedObjects())
emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::FP,
-(NumRestores - 1) * 16, TII, MachineInstr::NoFlags);
}
/// getFrameIndexOffset - Returns the displacement from the frame register to
/// the stack frame of the specified index.
int ARM64FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
int FI) const {
unsigned FrameReg;
return getFrameIndexReference(MF, FI, FrameReg);
}
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
/// debug info. It's the same as what we use for resolving the code-gen
/// references for now. FIXME: This can go wrong when references are
/// SP-relative and simple call frames aren't used.
int ARM64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
int FI,
unsigned &FrameReg) const {
return resolveFrameIndexReference(MF, FI, FrameReg);
}
int ARM64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
int FI, unsigned &FrameReg,
bool PreferFP) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARM64RegisterInfo *RegInfo =
static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
int FPOffset = MFI->getObjectOffset(FI) + 16;
int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
bool isFixed = MFI->isFixedObjectIndex(FI);
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs (and thus the SP isn't reliable as a base).
// Make sure useFPForScavengingIndex() does the right thing for the emergency
// spill slot.
bool UseFP = false;
if (AFI->hasStackFrame()) {
// Note: Keeping the following as multiple 'if' statements rather than
// merging to a single expression for readability.
//
// Argument access should always use the FP.
if (isFixed) {
UseFP = hasFP(MF);
} else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
// Use SP or FP, whichever gives us the best chance of the offset
// being in range for direct access. If the FPOffset is positive,
// that'll always be best, as the SP will be even further away.
// If the FPOffset is negative, we have to keep in mind that the
// available offset range for negative offsets is smaller than for
// positive ones. If we have variable sized objects, we're stuck with
// using the FP regardless, though, as the SP offset is unknown
// and we don't have a base pointer available. If an offset is
// available via the FP and the SP, use whichever is closest.
if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 ||
(FPOffset >= -256 && Offset > -FPOffset))
UseFP = true;
}
}
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
}
// Use the base pointer if we have one.
if (RegInfo->hasBasePointer(MF))
FrameReg = RegInfo->getBaseRegister();
else {
FrameReg = ARM64::SP;
// If we're using the red zone for this function, the SP won't actually
// be adjusted, so the offsets will be negative. They're also all
// within range of the signed 9-bit immediate instructions.
if (canUseRedZone(MF))
Offset -= AFI->getLocalStackSize();
}
return Offset;
}
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
if (Reg != ARM64::LR)
return getKillRegState(true);
// LR maybe referred to later by an @llvm.returnaddress intrinsic.
bool LRLiveIn = MF.getRegInfo().isLiveIn(ARM64::LR);
bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken());
return getKillRegState(LRKill);
}
bool ARM64FrameLowering::spillCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
unsigned Count = CSI.size();
DebugLoc DL;
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
if (MI != MBB.end())
DL = MI->getDebugLoc();
for (unsigned i = 0; i < Count; i += 2) {
unsigned idx = Count - i - 2;
unsigned Reg1 = CSI[idx].getReg();
unsigned Reg2 = CSI[idx + 1].getReg();
// GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
// list to come in sorted by frame index so that we can issue the store
// pair instructions directly. Assert if we see anything otherwise.
//
// The order of the registers in the list is controlled by
// getCalleeSavedRegs(), so they will always be in-order, as well.
assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() &&
"Out of order callee saved regs!");
unsigned StrOpc;
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
// Issue sequence of non-sp increment and pi sp spills for cs regs. The
// first spill is a pre-increment that allocates the stack.
// For example:
// stp x22, x21, [sp, #-48]! // addImm(-6)
// stp x20, x19, [sp, #16] // addImm(+2)
// stp fp, lr, [sp, #32] // addImm(+4)
// Rationale: This sequence saves uop updates compared to a sequence of
// pre-increment spills like stp xi,xj,[sp,#-16]!
// Note: Similar rational and sequence for restores in epilog.
if (ARM64::GPR64RegClass.contains(Reg1)) {
assert(ARM64::GPR64RegClass.contains(Reg2) &&
"Expected GPR64 callee-saved register pair!");
// For first spill use pre-increment store.
if (i == 0)
StrOpc = ARM64::STPXpre;
else
StrOpc = ARM64::STPXi;
} else if (ARM64::FPR64RegClass.contains(Reg1)) {
assert(ARM64::FPR64RegClass.contains(Reg2) &&
"Expected FPR64 callee-saved register pair!");
// For first spill use pre-increment store.
if (i == 0)
StrOpc = ARM64::STPDpre;
else
StrOpc = ARM64::STPDi;
} else
llvm_unreachable("Unexpected callee saved register!");
DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", "
<< TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx()
<< ", " << CSI[idx + 1].getFrameIdx() << ")\n");
// Compute offset: i = 0 => offset = -Count;
// i = 2 => offset = -(Count - 2) + Count = 2 = i; etc.
const int Offset = (i == 0) ? -Count : i;
assert((Offset >= -64 && Offset <= 63) &&
"Offset out of bounds for STP immediate");
BuildMI(MBB, MI, DL, TII.get(StrOpc))
.addReg(Reg2, getPrologueDeath(MF, Reg2))
.addReg(Reg1, getPrologueDeath(MF, Reg1))
.addReg(ARM64::SP)
.addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
.setMIFlag(MachineInstr::FrameSetup);
}
return true;
}
bool ARM64FrameLowering::restoreCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
unsigned Count = CSI.size();
DebugLoc DL;
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
if (MI != MBB.end())
DL = MI->getDebugLoc();
for (unsigned i = 0; i < Count; i += 2) {
unsigned Reg1 = CSI[i].getReg();
unsigned Reg2 = CSI[i + 1].getReg();
// GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
// list to come in sorted by frame index so that we can issue the store
// pair instructions directly. Assert if we see anything otherwise.
assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() &&
"Out of order callee saved regs!");
// Issue sequence of non-sp increment and sp-pi restores for cs regs. Only
// the last load is sp-pi post-increment and de-allocates the stack:
// For example:
// ldp fp, lr, [sp, #32] // addImm(+4)
// ldp x20, x19, [sp, #16] // addImm(+2)
// ldp x22, x21, [sp], #48 // addImm(+6)
// Note: see comment in spillCalleeSavedRegisters()
unsigned LdrOpc;
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
if (ARM64::GPR64RegClass.contains(Reg1)) {
assert(ARM64::GPR64RegClass.contains(Reg2) &&
"Expected GPR64 callee-saved register pair!");
if (i == Count - 2)
LdrOpc = ARM64::LDPXpost;
else
LdrOpc = ARM64::LDPXi;
} else if (ARM64::FPR64RegClass.contains(Reg1)) {
assert(ARM64::FPR64RegClass.contains(Reg2) &&
"Expected FPR64 callee-saved register pair!");
if (i == Count - 2)
LdrOpc = ARM64::LDPDpost;
else
LdrOpc = ARM64::LDPDi;
} else
llvm_unreachable("Unexpected callee saved register!");
DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", "
<< TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx()
<< ", " << CSI[i + 1].getFrameIdx() << ")\n");
// Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4;
// etc.
const int Offset = (i == Count - 2) ? Count : Count - i - 2;
assert((Offset >= -64 && Offset <= 63) &&
"Offset out of bounds for LDP immediate");
BuildMI(MBB, MI, DL, TII.get(LdrOpc))
.addReg(Reg2, getDefRegState(true))
.addReg(Reg1, getDefRegState(true))
.addReg(ARM64::SP)
.addImm(Offset); // [sp], #offset * 8 or [sp, #offset * 8]
// where the factor * 8 is implicit
}
return true;
}
void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan(
MachineFunction &MF, RegScavenger *RS) const {
const ARM64RegisterInfo *RegInfo =
static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
MachineRegisterInfo *MRI = &MF.getRegInfo();
SmallVector<unsigned, 4> UnspilledCSGPRs;
SmallVector<unsigned, 4> UnspilledCSFPRs;
// The frame record needs to be created by saving the appropriate registers
if (hasFP(MF)) {
MRI->setPhysRegUsed(ARM64::FP);
MRI->setPhysRegUsed(ARM64::LR);
}
// Spill the BasePtr if it's used. Do this first thing so that the
// getCalleeSavedRegs() below will get the right answer.
if (RegInfo->hasBasePointer(MF))
MRI->setPhysRegUsed(RegInfo->getBaseRegister());
// If any callee-saved registers are used, the frame cannot be eliminated.
unsigned NumGPRSpilled = 0;
unsigned NumFPRSpilled = 0;
bool ExtraCSSpill = false;
bool CanEliminateFrame = true;
DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:");
const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
// Check pairs of consecutive callee-saved registers.
for (unsigned i = 0; CSRegs[i]; i += 2) {
assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");
const unsigned OddReg = CSRegs[i];
const unsigned EvenReg = CSRegs[i + 1];
assert((ARM64::GPR64RegClass.contains(OddReg) &&
ARM64::GPR64RegClass.contains(EvenReg)) ^
(ARM64::FPR64RegClass.contains(OddReg) &&
ARM64::FPR64RegClass.contains(EvenReg)) &&
"Register class mismatch!");
const bool OddRegUsed = MRI->isPhysRegUsed(OddReg);
const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg);
// Early exit if none of the registers in the register pair is actually
// used.
if (!OddRegUsed && !EvenRegUsed) {
if (ARM64::GPR64RegClass.contains(OddReg)) {
UnspilledCSGPRs.push_back(OddReg);
UnspilledCSGPRs.push_back(EvenReg);
} else {
UnspilledCSFPRs.push_back(OddReg);
UnspilledCSFPRs.push_back(EvenReg);
}
continue;
}
unsigned Reg = ARM64::NoRegister;
// If only one of the registers of the register pair is used, make sure to
// mark the other one as used as well.
if (OddRegUsed ^ EvenRegUsed) {
// Find out which register is the additional spill.
Reg = OddRegUsed ? EvenReg : OddReg;
MRI->setPhysRegUsed(Reg);
}
DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));
assert(((OddReg == ARM64::LR && EvenReg == ARM64::FP) ||
(RegInfo->getEncodingValue(OddReg) + 1 ==
RegInfo->getEncodingValue(EvenReg))) &&
"Register pair of non-adjacent registers!");
if (ARM64::GPR64RegClass.contains(OddReg)) {
NumGPRSpilled += 2;
// If it's not a reserved register, we can use it in lieu of an
// emergency spill slot for the register scavenger.
// FIXME: It would be better to instead keep looking and choose another
// unspilled register that isn't reserved, if there is one.
if (Reg != ARM64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
ExtraCSSpill = true;
} else
NumFPRSpilled += 2;
CanEliminateFrame = false;
}
// FIXME: Set BigStack if any stack slot references may be out of range.
// For now, just conservatively guestimate based on unscaled indexing
// range. We'll end up allocating an unnecessary spill slot a lot, but
// realistically that's not a big deal at this stage of the game.
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
bool BigStack = (CFSize >= 256);
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
AFI->setHasStackFrame(true);
// Estimate if we might need to scavenge a register at some point in order
// to materialize a stack offset. If so, either spill one additional
// callee-saved register or reserve a special spill slot to facilitate
// register scavenging. If we already spilled an extra callee-saved register
// above to keep the number of spills even, we don't need to do anything else
// here.
if (BigStack && !ExtraCSSpill) {
// If we're adding a register to spill here, we have to add two of them
// to keep the number of regs to spill even.
assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
unsigned Count = 0;
while (!UnspilledCSGPRs.empty() && Count < 2) {
unsigned Reg = UnspilledCSGPRs.back();
UnspilledCSGPRs.pop_back();
DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
<< " to get a scratch register.\n");
MRI->setPhysRegUsed(Reg);
ExtraCSSpill = true;
++Count;
}
// If we didn't find an extra callee-saved register to spill, create
// an emergency spill slot.
if (!ExtraCSSpill) {
const TargetRegisterClass *RC = &ARM64::GPR64RegClass;
int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
RS->addScavengingFrameIndex(FI);
DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
<< " as the emergency spill slot.\n");
}
}
}

View File

@ -0,0 +1,75 @@
//===-- ARM64FrameLowering.h - TargetFrameLowering for ARM64 ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//
//
//===----------------------------------------------------------------------===//
#ifndef ARM64_FRAMELOWERING_H
#define ARM64_FRAMELOWERING_H
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
class ARM64Subtarget;
class ARM64TargetMachine;
class ARM64FrameLowering : public TargetFrameLowering {
const ARM64TargetMachine &TM;
public:
explicit ARM64FrameLowering(const ARM64TargetMachine &TM,
const ARM64Subtarget &STI)
: TargetFrameLowering(StackGrowsDown, 16, 0, 16,
false /*StackRealignable*/),
TM(TM) {}
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned FramePtr) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const;
int resolveFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg,
bool PreferFP = false) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const;
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const;
/// \brief Can this function use the red zone for local allocations.
bool canUseRedZone(const MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const;
bool hasReservedCallFrame(const MachineFunction &MF) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const;
};
} // End llvm namespace
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,423 @@
//==-- ARM64ISelLowering.h - ARM64 DAG Lowering Interface --------*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that ARM64 uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TARGET_ARM64_ISELLOWERING_H
#define LLVM_TARGET_ARM64_ISELLOWERING_H
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/Target/TargetLowering.h"
namespace llvm {
namespace ARM64ISD {
enum {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
CALL, // Function call.
// Almost the same as a normal call node, except that a TLSDesc relocation is
// needed so the linker can relax it correctly if possible.
TLSDESC_CALL,
ADRP, // Page address of a TargetGlobalAddress operand.
ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
LOADgot, // Load from automatically generated descriptor (e.g. Global
// Offset Table, TLS record).
RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
BRCOND, // Conditional branch instruction; "b.cond".
CSEL,
FCSEL, // Conditional move instruction.
CSINV, // Conditional select invert.
CSNEG, // Conditional select negate.
CSINC, // Conditional select increment.
// Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
// ELF.
THREAD_POINTER,
ADC,
SBC, // adc, sbc instructions
// Arithmetic instructions which write flags.
ADDS,
SUBS,
ADCS,
SBCS,
ANDS,
// Floating point comparison
FCMP,
// Floating point max and min instructions.
FMAX,
FMIN,
// Scalar extract
EXTR,
// Scalar-to-vector duplication
DUP,
DUPLANE8,
DUPLANE16,
DUPLANE32,
DUPLANE64,
// Vector immedate moves
MOVI,
MOVIshift,
MOVIedit,
MOVImsl,
FMOV,
MVNIshift,
MVNImsl,
// Vector immediate ops
BICi,
ORRi,
// Vector arithmetic negation
NEG,
// Vector shuffles
ZIP1,
ZIP2,
UZP1,
UZP2,
TRN1,
TRN2,
REV16,
REV32,
REV64,
EXT,
// Vector shift by scalar
VSHL,
VLSHR,
VASHR,
// Vector shift by scalar (again)
SQSHL_I,
UQSHL_I,
SQSHLU_I,
SRSHR_I,
URSHR_I,
// Vector comparisons
CMEQ,
CMGE,
CMGT,
CMHI,
CMHS,
FCMEQ,
FCMGE,
FCMGT,
// Vector zero comparisons
CMEQz,
CMGEz,
CMGTz,
CMLEz,
CMLTz,
FCMEQz,
FCMGEz,
FCMGTz,
FCMLEz,
FCMLTz,
// Vector bitwise negation
NOT,
// Vector bitwise selection
BIT,
// Compare-and-branch
CBZ,
CBNZ,
TBZ,
TBNZ,
// Tail calls
TC_RETURN,
// Custom prefetch handling
PREFETCH,
// {s|u}int to FP within a FP register.
SITOF,
UITOF
};
} // end namespace ARM64ISD
class ARM64Subtarget;
class ARM64TargetMachine;
class ARM64TargetLowering : public TargetLowering {
bool RequireStrictAlign;
public:
explicit ARM64TargetLowering(ARM64TargetMachine &TM);
/// Selects the correct CCAssignFn for a the given CallingConvention
/// value.
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
/// computeMaskedBitsForTargetNode - Determine which of the bits specified in
/// Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
void computeMaskedBitsForTargetNode(const SDValue Op, APInt &KnownZero,
APInt &KnownOne, const SelectionDAG &DAG,
unsigned Depth = 0) const;
virtual MVT getScalarShiftAmountTy(EVT LHSTy) const;
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
/// unaligned memory accesses. of the specified type.
virtual bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
bool *Fast = 0) const {
if (RequireStrictAlign)
return false;
// FIXME: True for Cyclone, but not necessary others.
if (Fast)
*Fast = true;
return true;
}
/// LowerOperation - Provide custom lowering hooks for some operations.
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual const char *getTargetNodeName(unsigned Opcode) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
/// getMaximalGlobalOffset - Returns the maximal possible offset which can
/// be used for loads / stores from the global.
virtual unsigned getMaximalGlobalOffset() const;
/// Returns true if a cast between SrcAS and DestAS is a noop.
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
// Addrspacecasts are always noops.
return true;
}
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const;
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
/// isShuffleMaskLegal - Return true if the given shuffle mask can be
/// codegen'd directly, or if it should be stack expanded.
virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
/// getSetCCResultType - Return the ISD::SETCC ValueType
virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const;
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode) const;
MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size) const;
MachineBasicBlock *EmitAtomicBinary128(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned BinOpcodeLo,
unsigned BinOpcodeHi) const;
MachineBasicBlock *EmitAtomicCmpSwap128(MachineInstr *MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitAtomicMinMax128(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned CondCode) const;
MachineBasicBlock *EmitF128CSEL(MachineInstr *MI,
MachineBasicBlock *BB) const;
virtual MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
unsigned Intrinsic) const;
virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
virtual bool isZExtFree(EVT VT1, EVT VT2) const;
virtual bool isZExtFree(SDValue Val, EVT VT2) const;
virtual bool hasPairedLoad(Type *LoadedType,
unsigned &RequiredAligment) const;
virtual bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const;
virtual bool isLegalAddImmediate(int64_t) const;
virtual bool isLegalICmpImmediate(int64_t) const;
virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
unsigned SrcAlign, bool IsMemset,
bool ZeroMemset, bool MemcpyStrSrc,
MachineFunction &MF) const;
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
/// \brief Return the cost of the scaling factor used in the addressing
/// mode represented by AM for this target, for a load/store
/// of the specified type.
/// If the AM is supported, the return value must be >= 0.
/// If the AM is not supported, it returns a negative value.
virtual int getScalingFactorCost(const AddrMode &AM, Type *Ty) const;
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
/// expanded to FMAs when this method returns true, otherwise fmuladd is
/// expanded to fmul + fadd.
virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
virtual const uint16_t *getScratchRegisters(CallingConv::ID CC) const;
virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const;
private:
/// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can
/// make the right decision when generating code for different targets.
const ARM64Subtarget *Subtarget;
void addTypeForNEON(EVT VT, EVT PromotedBitwiseVT);
void addDRTypeForNEON(MVT VT);
void addQRTypeForNEON(MVT VT);
virtual SDValue
LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL,
SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue LowerCall(CallLoweringInfo & /*CLI*/,
SmallVectorImpl<SDValue> &InVals) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
bool isThisReturn, SDValue ThisVal) const;
bool isEligibleForTailCallOptimization(
SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
bool isCalleeStructRet, bool isCallerStructRet,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL,
SDValue &Chain) const;
virtual bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const;
virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, SDLoc DL,
SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL,
SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
RTLIB::Libcall Call) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorAND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
ConstraintType getConstraintType(const std::string &Constraint) const;
/// Examine constraint string and operand type and determine a weight value.
/// The operand object must already have been set up with the operand type.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info,
const char *constraint) const;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const;
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const;
bool mayBeEmittedAsTailCall(CallInst *CI) const;
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
ISD::MemIndexedMode &AM, bool &IsInc,
SelectionDAG &DAG) const;
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const;
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
SDValue &Offset, ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const;
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
};
namespace ARM64 {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
} // end namespace ARM64
} // end namespace llvm
#endif // LLVM_TARGET_ARM64_ISELLOWERING_H

View File

@ -0,0 +1,293 @@
//===- ARM64InstrAtomics.td - ARM64 Atomic codegen support -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// ARM64 Atomic operand code-gen constructs.
//
//===----------------------------------------------------------------------===//
//===----------------------------------
// Atomic fences
//===----------------------------------
def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>;
def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>;
//===----------------------------------
// Atomic loads
//===----------------------------------
// When they're actually atomic, only one addressing mode (GPR64sp) is
// supported, but when they're relaxed and anything can be used, all the
// standard modes would be valid and may give efficiency gains.
// A atomic load operation that actually needs acquire semantics.
class acquiring_load<PatFrag base>
: PatFrag<(ops node:$ptr), (base node:$ptr), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
assert(Ordering != AcquireRelease && "unexpected load ordering");
return Ordering == Acquire || Ordering == SequentiallyConsistent;
}]>;
// An atomic load operation that does not need either acquire or release
// semantics.
class relaxed_load<PatFrag base>
: PatFrag<(ops node:$ptr), (base node:$ptr), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
return Ordering == Monotonic || Ordering == Unordered;
}]>;
// 8-bit loads
def : Pat<(acquiring_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
def : Pat<(relaxed_load<atomic_load_8> ro_indexed8:$addr),
(LDRBBro ro_indexed8:$addr)>;
def : Pat<(relaxed_load<atomic_load_8> am_indexed8:$addr),
(LDRBBui am_indexed8:$addr)>;
def : Pat<(relaxed_load<atomic_load_8> am_unscaled8:$addr),
(LDURBBi am_unscaled8:$addr)>;
// 16-bit loads
def : Pat<(acquiring_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
def : Pat<(relaxed_load<atomic_load_16> ro_indexed16:$addr),
(LDRHHro ro_indexed16:$addr)>;
def : Pat<(relaxed_load<atomic_load_16> am_indexed16:$addr),
(LDRHHui am_indexed16:$addr)>;
def : Pat<(relaxed_load<atomic_load_16> am_unscaled16:$addr),
(LDURHHi am_unscaled16:$addr)>;
// 32-bit loads
def : Pat<(acquiring_load<atomic_load_32> GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>;
def : Pat<(relaxed_load<atomic_load_32> ro_indexed32:$addr),
(LDRWro ro_indexed32:$addr)>;
def : Pat<(relaxed_load<atomic_load_32> am_indexed32:$addr),
(LDRWui am_indexed32:$addr)>;
def : Pat<(relaxed_load<atomic_load_32> am_unscaled32:$addr),
(LDURWi am_unscaled32:$addr)>;
// 64-bit loads
def : Pat<(acquiring_load<atomic_load_64> GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>;
def : Pat<(relaxed_load<atomic_load_64> ro_indexed64:$addr),
(LDRXro ro_indexed64:$addr)>;
def : Pat<(relaxed_load<atomic_load_64> am_indexed64:$addr),
(LDRXui am_indexed64:$addr)>;
def : Pat<(relaxed_load<atomic_load_64> am_unscaled64:$addr),
(LDURXi am_unscaled64:$addr)>;
//===----------------------------------
// Atomic stores
//===----------------------------------
// When they're actually atomic, only one addressing mode (GPR64sp) is
// supported, but when they're relaxed and anything can be used, all the
// standard modes would be valid and may give efficiency gains.
// A store operation that actually needs release semantics.
class releasing_store<PatFrag base>
: PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
assert(Ordering != AcquireRelease && "unexpected store ordering");
return Ordering == Release || Ordering == SequentiallyConsistent;
}]>;
// An atomic store operation that doesn't actually need to be atomic on ARM64.
class relaxed_store<PatFrag base>
: PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
return Ordering == Monotonic || Ordering == Unordered;
}]>;
// 8-bit stores
def : Pat<(releasing_store<atomic_store_8> GPR64sp:$ptr, GPR32:$val),
(STLRB GPR32:$val, GPR64sp:$ptr)>;
def : Pat<(relaxed_store<atomic_store_8> ro_indexed8:$ptr, GPR32:$val),
(STRBBro GPR32:$val, ro_indexed8:$ptr)>;
def : Pat<(relaxed_store<atomic_store_8> am_indexed8:$ptr, GPR32:$val),
(STRBBui GPR32:$val, am_indexed8:$ptr)>;
def : Pat<(relaxed_store<atomic_store_8> am_unscaled8:$ptr, GPR32:$val),
(STURBBi GPR32:$val, am_unscaled8:$ptr)>;
// 16-bit stores
def : Pat<(releasing_store<atomic_store_16> GPR64sp:$ptr, GPR32:$val),
(STLRH GPR32:$val, GPR64sp:$ptr)>;
def : Pat<(relaxed_store<atomic_store_16> ro_indexed16:$ptr, GPR32:$val),
(STRHHro GPR32:$val, ro_indexed16:$ptr)>;
def : Pat<(relaxed_store<atomic_store_16> am_indexed16:$ptr, GPR32:$val),
(STRHHui GPR32:$val, am_indexed16:$ptr)>;
def : Pat<(relaxed_store<atomic_store_16> am_unscaled16:$ptr, GPR32:$val),
(STURHHi GPR32:$val, am_unscaled16:$ptr)>;
// 32-bit stores
def : Pat<(releasing_store<atomic_store_32> GPR64sp:$ptr, GPR32:$val),
(STLRW GPR32:$val, GPR64sp:$ptr)>;
def : Pat<(relaxed_store<atomic_store_32> ro_indexed32:$ptr, GPR32:$val),
(STRWro GPR32:$val, ro_indexed32:$ptr)>;
def : Pat<(relaxed_store<atomic_store_32> am_indexed32:$ptr, GPR32:$val),
(STRWui GPR32:$val, am_indexed32:$ptr)>;
def : Pat<(relaxed_store<atomic_store_32> am_unscaled32:$ptr, GPR32:$val),
(STURWi GPR32:$val, am_unscaled32:$ptr)>;
// 64-bit stores
def : Pat<(releasing_store<atomic_store_64> GPR64sp:$ptr, GPR64:$val),
(STLRX GPR64:$val, GPR64sp:$ptr)>;
def : Pat<(relaxed_store<atomic_store_64> ro_indexed64:$ptr, GPR64:$val),
(STRXro GPR64:$val, ro_indexed64:$ptr)>;
def : Pat<(relaxed_store<atomic_store_64> am_indexed64:$ptr, GPR64:$val),
(STRXui GPR64:$val, am_indexed64:$ptr)>;
def : Pat<(relaxed_store<atomic_store_64> am_unscaled64:$ptr, GPR64:$val),
(STURXi GPR64:$val, am_unscaled64:$ptr)>;
//===----------------------------------
// Atomic read-modify-write operations
//===----------------------------------
// More complicated operations need lots of C++ support, so we just create
// skeletons here for the C++ code to refer to.
let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in {
multiclass AtomicSizes {
def _I8 : Pseudo<(outs GPR32:$dst),
(ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
def _I16 : Pseudo<(outs GPR32:$dst),
(ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
def _I32 : Pseudo<(outs GPR32:$dst),
(ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
def _I64 : Pseudo<(outs GPR64:$dst),
(ins GPR64sp:$ptr, GPR64:$incr, i32imm:$ordering), []>;
def _I128 : Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi),
(ins GPR64sp:$ptr, GPR64:$incrlo, GPR64:$incrhi,
i32imm:$ordering), []>;
}
}
defm ATOMIC_LOAD_ADD : AtomicSizes;
defm ATOMIC_LOAD_SUB : AtomicSizes;
defm ATOMIC_LOAD_AND : AtomicSizes;
defm ATOMIC_LOAD_OR : AtomicSizes;
defm ATOMIC_LOAD_XOR : AtomicSizes;
defm ATOMIC_LOAD_NAND : AtomicSizes;
defm ATOMIC_SWAP : AtomicSizes;
let Defs = [CPSR] in {
// These operations need a CMP to calculate the correct value
defm ATOMIC_LOAD_MIN : AtomicSizes;
defm ATOMIC_LOAD_MAX : AtomicSizes;
defm ATOMIC_LOAD_UMIN : AtomicSizes;
defm ATOMIC_LOAD_UMAX : AtomicSizes;
}
class AtomicCmpSwap<RegisterClass GPRData>
: Pseudo<(outs GPRData:$dst),
(ins GPR64sp:$ptr, GPRData:$old, GPRData:$new,
i32imm:$ordering), []> {
let usesCustomInserter = 1;
let hasCtrlDep = 1;
let mayLoad = 1;
let mayStore = 1;
let Defs = [CPSR];
}
def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<GPR32>;
def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>;
def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>;
def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>;
def ATOMIC_CMP_SWAP_I128
: Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi),
(ins GPR64sp:$ptr, GPR64:$oldlo, GPR64:$oldhi,
GPR64:$newlo, GPR64:$newhi, i32imm:$ordering), []> {
let usesCustomInserter = 1;
let hasCtrlDep = 1;
let mayLoad = 1;
let mayStore = 1;
let Defs = [CPSR];
}
//===----------------------------------
// Low-level exclusive operations
//===----------------------------------
// Load-exclusives.
def ldxr_1 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
}]>;
def ldxr_2 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
}]>;
def ldxr_4 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
}]>;
def ldxr_8 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
}]>;
def : Pat<(ldxr_1 am_noindex:$addr),
(SUBREG_TO_REG (i64 0), (LDXRB am_noindex:$addr), sub_32)>;
def : Pat<(ldxr_2 am_noindex:$addr),
(SUBREG_TO_REG (i64 0), (LDXRH am_noindex:$addr), sub_32)>;
def : Pat<(ldxr_4 am_noindex:$addr),
(SUBREG_TO_REG (i64 0), (LDXRW am_noindex:$addr), sub_32)>;
def : Pat<(ldxr_8 am_noindex:$addr), (LDXRX am_noindex:$addr)>;
def : Pat<(and (ldxr_1 am_noindex:$addr), 0xff),
(SUBREG_TO_REG (i64 0), (LDXRB am_noindex:$addr), sub_32)>;
def : Pat<(and (ldxr_2 am_noindex:$addr), 0xffff),
(SUBREG_TO_REG (i64 0), (LDXRH am_noindex:$addr), sub_32)>;
def : Pat<(and (ldxr_4 am_noindex:$addr), 0xffffffff),
(SUBREG_TO_REG (i64 0), (LDXRW am_noindex:$addr), sub_32)>;
// Store-exclusives.
def stxr_1 : PatFrag<(ops node:$val, node:$ptr),
(int_arm64_stxr node:$val, node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
}]>;
def stxr_2 : PatFrag<(ops node:$val, node:$ptr),
(int_arm64_stxr node:$val, node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
}]>;
def stxr_4 : PatFrag<(ops node:$val, node:$ptr),
(int_arm64_stxr node:$val, node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
}]>;
def stxr_8 : PatFrag<(ops node:$val, node:$ptr),
(int_arm64_stxr node:$val, node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
}]>;
def : Pat<(stxr_1 GPR64:$val, am_noindex:$addr),
(STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
def : Pat<(stxr_2 GPR64:$val, am_noindex:$addr),
(STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
def : Pat<(stxr_4 GPR64:$val, am_noindex:$addr),
(STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
def : Pat<(stxr_8 GPR64:$val, am_noindex:$addr),
(STXRX GPR64:$val, am_noindex:$addr)>;
def : Pat<(stxr_1 (zext (and GPR32:$val, 0xff)), am_noindex:$addr),
(STXRB GPR32:$val, am_noindex:$addr)>;
def : Pat<(stxr_2 (zext (and GPR32:$val, 0xffff)), am_noindex:$addr),
(STXRH GPR32:$val, am_noindex:$addr)>;
def : Pat<(stxr_4 (zext GPR32:$val), am_noindex:$addr),
(STXRW GPR32:$val, am_noindex:$addr)>;
def : Pat<(stxr_1 (and GPR64:$val, 0xff), am_noindex:$addr),
(STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
def : Pat<(stxr_2 (and GPR64:$val, 0xffff), am_noindex:$addr),
(STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), am_noindex:$addr),
(STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
// And clear exclusive.
def : Pat<(int_arm64_clrex), (CLREX 0xf)>;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,223 @@
//===- ARM64InstrInfo.h - ARM64 Instruction Information ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the ARM64 implementation of the TargetInstrInfo class.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TARGET_ARM64INSTRINFO_H
#define LLVM_TARGET_ARM64INSTRINFO_H
#include "ARM64.h"
#include "ARM64RegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "ARM64GenInstrInfo.inc"
namespace llvm {
class ARM64Subtarget;
class ARM64TargetMachine;
class ARM64InstrInfo : public ARM64GenInstrInfo {
// Reserve bits in the MachineMemOperand target hint flags, starting at 1.
// They will be shifted into MOTargetHintStart when accessed.
enum TargetMemOperandFlags {
MOSuppressPair = 1
};
const ARM64RegisterInfo RI;
const ARM64Subtarget &Subtarget;
public:
explicit ARM64InstrInfo(const ARM64Subtarget &STI);
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
virtual const ARM64RegisterInfo &getRegisterInfo() const { return RI; }
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
virtual bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
unsigned &DstReg, unsigned &SubIdx) const;
virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
/// \brief Does this instruction set its full destination register to zero?
bool isGPRZero(const MachineInstr *MI) const;
/// \brief Does this instruction rename a GPR without modifying bits?
bool isGPRCopy(const MachineInstr *MI) const;
/// \brief Does this instruction rename an FPR without modifying bits?
bool isFPRCopy(const MachineInstr *MI) const;
/// Return true if this is load/store scales or extends its register offset.
/// This refers to scaling a dynamic index as opposed to scaled immediates.
/// MI should be a memory op that allows scaled addressing.
bool isScaledAddr(const MachineInstr *MI) const;
/// Return true if pairing the given load or store is hinted to be
/// unprofitable.
bool isLdStPairSuppressed(const MachineInstr *MI) const;
/// Hint that pairing the given load or store is unprofitable.
void suppressLdStPair(MachineInstr *MI) const;
virtual bool getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
unsigned &Offset,
const TargetRegisterInfo *TRI) const;
virtual bool enableClusterLoads() const { return true; }
virtual bool shouldClusterLoads(MachineInstr *FirstLdSt,
MachineInstr *SecondLdSt,
unsigned NumLoads) const;
virtual bool shouldScheduleAdjacent(MachineInstr *First,
MachineInstr *Second) const;
MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
uint64_t Offset, const MDNode *MDPtr,
DebugLoc DL) const;
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc, unsigned Opcode,
llvm::ArrayRef<unsigned> Indices) const;
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
virtual MachineInstr *
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
int FrameIndex) const;
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify = false) const;
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const;
virtual bool
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
virtual bool canInsertSelect(const MachineBasicBlock &,
const SmallVectorImpl<MachineOperand> &Cond,
unsigned, unsigned, int &, int &, int &) const;
virtual void insertSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DstReg,
const SmallVectorImpl<MachineOperand> &Cond,
unsigned TrueReg, unsigned FalseReg) const;
virtual void getNoopForMachoTarget(MCInst &NopInst) const;
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
/// Return true if the comparison instruction can be analyzed.
virtual bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
unsigned &SrcReg2, int &CmpMask,
int &CmpValue) const;
/// optimizeCompareInstr - Convert the instruction supplying the argument to
/// the comparison into one that sets the zero bit in the flags register.
virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
unsigned SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const;
private:
void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL,
MachineBasicBlock *TBB,
const SmallVectorImpl<MachineOperand> &Cond) const;
};
/// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg
/// plus Offset. This is intended to be used from within the prolog/epilog
/// insertion (PEI) pass, where a virtual scratch register may be allocated
/// if necessary, to be replaced by the scavenger at the end of PEI.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset,
const ARM64InstrInfo *TII,
MachineInstr::MIFlag = MachineInstr::NoFlags,
bool SetCPSR = false);
/// rewriteARM64FrameIndex - Rewrite MI to access 'Offset' bytes from the
/// FP. Return false if the offset could not be handled directly in MI, and
/// return the left-over portion by reference.
bool rewriteARM64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, int &Offset,
const ARM64InstrInfo *TII);
/// \brief Use to report the frame offset status in isARM64FrameOffsetLegal.
enum ARM64FrameOffsetStatus {
ARM64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply.
ARM64FrameOffsetIsLegal = 0x1, ///< Offset is legal.
ARM64FrameOffsetCanUpdate = 0x2 ///< Offset can apply, at least partly.
};
/// \brief Check if the @p Offset is a valid frame offset for @p MI.
/// The returned value reports the validity of the frame offset for @p MI.
/// It uses the values defined by ARM64FrameOffsetStatus for that.
/// If result == ARM64FrameOffsetCannotUpdate, @p MI cannot be updated to
/// use an offset.eq
/// If result & ARM64FrameOffsetIsLegal, @p Offset can completely be
/// rewriten in @p MI.
/// If result & ARM64FrameOffsetCanUpdate, @p Offset contains the
/// amount that is off the limit of the legal offset.
/// If set, @p OutUseUnscaledOp will contain the whether @p MI should be
/// turned into an unscaled operator, which opcode is in @p OutUnscaledOp.
/// If set, @p EmittableOffset contains the amount that can be set in @p MI
/// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that
/// is a legal offset.
int isARM64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
bool *OutUseUnscaledOp = NULL,
unsigned *OutUnscaledOp = NULL,
int *EmittableOffset = NULL);
static inline bool isUncondBranchOpcode(int Opc) { return Opc == ARM64::B; }
static inline bool isCondBranchOpcode(int Opc) {
switch (Opc) {
case ARM64::Bcc:
case ARM64::CBZW:
case ARM64::CBZX:
case ARM64::CBNZW:
case ARM64::CBNZX:
case ARM64::TBZ:
case ARM64::TBNZ:
return true;
default:
return false;
}
}
static inline bool isIndirectBranchOpcode(int Opc) { return Opc == ARM64::BR; }
} // end namespace llvm
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,950 @@
//===-- ARM64LoadStoreOptimizer.cpp - ARM64 load/store opt. pass --*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains a pass that performs load / store related peephole
// optimizations. This pass should be run after register allocation.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm64-ldst-opt"
#include "ARM64InstrInfo.h"
#include "MCTargetDesc/ARM64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
/// ARM64AllocLoadStoreOpt - Post-register allocation pass to combine
/// load / store instructions to form ldp / stp instructions.
STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
STATISTIC(NumPostFolded, "Number of post-index updates folded");
STATISTIC(NumPreFolded, "Number of pre-index updates folded");
STATISTIC(NumUnscaledPairCreated,
"Number of load/store from unscaled generated");
static cl::opt<bool> DoLoadStoreOpt("arm64-load-store-opt", cl::init(true),
cl::Hidden);
static cl::opt<unsigned> ScanLimit("arm64-load-store-scan-limit", cl::init(20),
cl::Hidden);
// Place holder while testing unscaled load/store combining
static cl::opt<bool>
EnableARM64UnscaledMemOp("arm64-unscaled-mem-op", cl::Hidden,
cl::desc("Allow ARM64 unscaled load/store combining"),
cl::init(true));
namespace {
struct ARM64LoadStoreOpt : public MachineFunctionPass {
static char ID;
ARM64LoadStoreOpt() : MachineFunctionPass(ID) {}
const ARM64InstrInfo *TII;
const TargetRegisterInfo *TRI;
// Scan the instructions looking for a load/store that can be combined
// with the current instruction into a load/store pair.
// Return the matching instruction if one is found, else MBB->end().
// If a matching instruction is found, mergeForward is set to true if the
// merge is to remove the first instruction and replace the second with
// a pair-wise insn, and false if the reverse is true.
MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
bool &mergeForward,
unsigned Limit);
// Merge the two instructions indicated into a single pair-wise instruction.
// If mergeForward is true, erase the first instruction and fold its
// operation into the second. If false, the reverse. Return the instruction
// following the first instruction (which may change during proecessing).
MachineBasicBlock::iterator
mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired, bool mergeForward);
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
// pre or post indexed addressing with writeback. Scan forwards.
MachineBasicBlock::iterator
findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit,
int Value);
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
// pre or post indexed addressing with writeback. Scan backwards.
MachineBasicBlock::iterator
findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
// Merge a pre-index base register update into a ld/st instruction.
MachineBasicBlock::iterator
mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Update);
// Merge a post-index base register update into a ld/st instruction.
MachineBasicBlock::iterator
mergePostIdxUpdateInsn(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Update);
bool optimizeBlock(MachineBasicBlock &MBB);
virtual bool runOnMachineFunction(MachineFunction &Fn);
virtual const char *getPassName() const {
return "ARM64 load / store optimization pass";
}
private:
int getMemSize(MachineInstr *MemMI);
};
char ARM64LoadStoreOpt::ID = 0;
}
static bool isUnscaledLdst(unsigned Opc) {
switch (Opc) {
default:
return false;
case ARM64::STURSi:
return true;
case ARM64::STURDi:
return true;
case ARM64::STURQi:
return true;
case ARM64::STURWi:
return true;
case ARM64::STURXi:
return true;
case ARM64::LDURSi:
return true;
case ARM64::LDURDi:
return true;
case ARM64::LDURQi:
return true;
case ARM64::LDURWi:
return true;
case ARM64::LDURXi:
return true;
}
}
// Size in bytes of the data moved by an unscaled load or store
int ARM64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
switch (MemMI->getOpcode()) {
default:
llvm_unreachable("Opcode has has unknown size!");
case ARM64::STRSui:
case ARM64::STURSi:
return 4;
case ARM64::STRDui:
case ARM64::STURDi:
return 8;
case ARM64::STRQui:
case ARM64::STURQi:
return 16;
case ARM64::STRWui:
case ARM64::STURWi:
return 4;
case ARM64::STRXui:
case ARM64::STURXi:
return 8;
case ARM64::LDRSui:
case ARM64::LDURSi:
return 4;
case ARM64::LDRDui:
case ARM64::LDURDi:
return 8;
case ARM64::LDRQui:
case ARM64::LDURQi:
return 16;
case ARM64::LDRWui:
case ARM64::LDURWi:
return 4;
case ARM64::LDRXui:
case ARM64::LDURXi:
return 8;
}
}
static unsigned getMatchingPairOpcode(unsigned Opc) {
switch (Opc) {
default:
llvm_unreachable("Opcode has no pairwise equivalent!");
case ARM64::STRSui:
case ARM64::STURSi:
return ARM64::STPSi;
case ARM64::STRDui:
case ARM64::STURDi:
return ARM64::STPDi;
case ARM64::STRQui:
case ARM64::STURQi:
return ARM64::STPQi;
case ARM64::STRWui:
case ARM64::STURWi:
return ARM64::STPWi;
case ARM64::STRXui:
case ARM64::STURXi:
return ARM64::STPXi;
case ARM64::LDRSui:
case ARM64::LDURSi:
return ARM64::LDPSi;
case ARM64::LDRDui:
case ARM64::LDURDi:
return ARM64::LDPDi;
case ARM64::LDRQui:
case ARM64::LDURQi:
return ARM64::LDPQi;
case ARM64::LDRWui:
case ARM64::LDURWi:
return ARM64::LDPWi;
case ARM64::LDRXui:
case ARM64::LDURXi:
return ARM64::LDPXi;
}
}
static unsigned getPreIndexedOpcode(unsigned Opc) {
switch (Opc) {
default:
llvm_unreachable("Opcode has no pre-indexed equivalent!");
case ARM64::STRSui: return ARM64::STRSpre;
case ARM64::STRDui: return ARM64::STRDpre;
case ARM64::STRQui: return ARM64::STRQpre;
case ARM64::STRWui: return ARM64::STRWpre;
case ARM64::STRXui: return ARM64::STRXpre;
case ARM64::LDRSui: return ARM64::LDRSpre;
case ARM64::LDRDui: return ARM64::LDRDpre;
case ARM64::LDRQui: return ARM64::LDRQpre;
case ARM64::LDRWui: return ARM64::LDRWpre;
case ARM64::LDRXui: return ARM64::LDRXpre;
}
}
static unsigned getPostIndexedOpcode(unsigned Opc) {
switch (Opc) {
default:
llvm_unreachable("Opcode has no post-indexed wise equivalent!");
case ARM64::STRSui:
return ARM64::STRSpost;
case ARM64::STRDui:
return ARM64::STRDpost;
case ARM64::STRQui:
return ARM64::STRQpost;
case ARM64::STRWui:
return ARM64::STRWpost;
case ARM64::STRXui:
return ARM64::STRXpost;
case ARM64::LDRSui:
return ARM64::LDRSpost;
case ARM64::LDRDui:
return ARM64::LDRDpost;
case ARM64::LDRQui:
return ARM64::LDRQpost;
case ARM64::LDRWui:
return ARM64::LDRWpost;
case ARM64::LDRXui:
return ARM64::LDRXpost;
}
}
MachineBasicBlock::iterator
ARM64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
bool mergeForward) {
MachineBasicBlock::iterator NextI = I;
++NextI;
// If NextI is the second of the two instructions to be merged, we need
// to skip one further. Either way we merge will invalidate the iterator,
// and we don't need to scan the new instruction, as it's a pairwise
// instruction, which we're not considering for further action anyway.
if (NextI == Paired)
++NextI;
bool IsUnscaled = isUnscaledLdst(I->getOpcode());
int OffsetStride = IsUnscaled && EnableARM64UnscaledMemOp ? getMemSize(I) : 1;
unsigned NewOpc = getMatchingPairOpcode(I->getOpcode());
// Insert our new paired instruction after whichever of the paired
// instructions mergeForward indicates.
MachineBasicBlock::iterator InsertionPoint = mergeForward ? Paired : I;
// Also based on mergeForward is from where we copy the base register operand
// so we get the flags compatible with the input code.
MachineOperand &BaseRegOp =
mergeForward ? Paired->getOperand(1) : I->getOperand(1);
// Which register is Rt and which is Rt2 depends on the offset order.
MachineInstr *RtMI, *Rt2MI;
if (I->getOperand(2).getImm() ==
Paired->getOperand(2).getImm() + OffsetStride) {
RtMI = Paired;
Rt2MI = I;
} else {
RtMI = I;
Rt2MI = Paired;
}
// Handle Unscaled
int OffsetImm = RtMI->getOperand(2).getImm();
if (IsUnscaled && EnableARM64UnscaledMemOp)
OffsetImm /= OffsetStride;
// Construct the new instruction.
MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint,
I->getDebugLoc(), TII->get(NewOpc))
.addOperand(RtMI->getOperand(0))
.addOperand(Rt2MI->getOperand(0))
.addOperand(BaseRegOp)
.addImm(OffsetImm);
(void)MIB;
// FIXME: Do we need/want to copy the mem operands from the source
// instructions? Probably. What uses them after this?
DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n ");
DEBUG(I->print(dbgs()));
DEBUG(dbgs() << " ");
DEBUG(Paired->print(dbgs()));
DEBUG(dbgs() << " with instruction:\n ");
DEBUG(((MachineInstr *)MIB)->print(dbgs()));
DEBUG(dbgs() << "\n");
// Erase the old instructions.
I->eraseFromParent();
Paired->eraseFromParent();
return NextI;
}
/// trackRegDefsUses - Remember what registers the specified instruction uses
/// and modifies.
static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs,
BitVector &UsedRegs,
const TargetRegisterInfo *TRI) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (MO.isRegMask())
ModifiedRegs.setBitsNotInMask(MO.getRegMask());
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
if (MO.isDef()) {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
ModifiedRegs.set(*AI);
} else {
assert(MO.isUse() && "Reg operand not a def and not a use?!?");
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
UsedRegs.set(*AI);
}
}
}
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
if (!IsUnscaled && (Offset > 63 || Offset < -64))
return false;
if (IsUnscaled) {
// Convert the byte-offset used by unscaled into an "element" offset used
// by the scaled pair load/store instructions.
int elemOffset = Offset / OffsetStride;
if (elemOffset > 63 || elemOffset < -64)
return false;
}
return true;
}
// Do alignment, specialized to power of 2 and for signed ints,
// avoiding having to do a C-style cast from uint_64t to int when
// using RoundUpToAlignment from include/llvm/Support/MathExtras.h.
// FIXME: Move this function to include/MathExtras.h?
static int alignTo(int Num, int PowOf2) {
return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
}
/// findMatchingInsn - Scan the instructions looking for a load/store that can
/// be combined with the current instruction into a load/store pair.
MachineBasicBlock::iterator
ARM64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
bool &mergeForward, unsigned Limit) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator MBBI = I;
MachineInstr *FirstMI = I;
++MBBI;
int Opc = FirstMI->getOpcode();
bool mayLoad = FirstMI->mayLoad();
bool IsUnscaled = isUnscaledLdst(Opc);
unsigned Reg = FirstMI->getOperand(0).getReg();
unsigned BaseReg = FirstMI->getOperand(1).getReg();
int Offset = FirstMI->getOperand(2).getImm();
// Early exit if the first instruction modifies the base register.
// e.g., ldr x0, [x0]
// Early exit if the offset if not possible to match. (6 bits of positive
// range, plus allow an extra one in case we find a later insn that matches
// with Offset-1
if (FirstMI->modifiesRegister(BaseReg, TRI))
return E;
int OffsetStride =
IsUnscaled && EnableARM64UnscaledMemOp ? getMemSize(FirstMI) : 1;
if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
return E;
// Track which registers have been modified and used between the first insn
// (inclusive) and the second insn.
BitVector ModifiedRegs, UsedRegs;
ModifiedRegs.resize(TRI->getNumRegs());
UsedRegs.resize(TRI->getNumRegs());
for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
MachineInstr *MI = MBBI;
// Skip DBG_VALUE instructions. Otherwise debug info can affect the
// optimization by changing how far we scan.
if (MI->isDebugValue())
continue;
// Now that we know this is a real instruction, count it.
++Count;
if (Opc == MI->getOpcode() && MI->getOperand(2).isImm()) {
// If we've found another instruction with the same opcode, check to see
// if the base and offset are compatible with our starting instruction.
// These instructions all have scaled immediate operands, so we just
// check for +1/-1. Make sure to check the new instruction offset is
// actually an immediate and not a symbolic reference destined for
// a relocation.
//
// Pairwise instructions have a 7-bit signed offset field. Single insns
// have a 12-bit unsigned offset field. To be a valid combine, the
// final offset must be in range.
unsigned MIBaseReg = MI->getOperand(1).getReg();
int MIOffset = MI->getOperand(2).getImm();
if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
(Offset + OffsetStride == MIOffset))) {
int MinOffset = Offset < MIOffset ? Offset : MIOffset;
// If this is a volatile load/store that otherwise matched, stop looking
// as something is going on that we don't have enough information to
// safely transform. Similarly, stop if we see a hint to avoid pairs.
if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
return E;
// If the resultant immediate offset of merging these instructions
// is out of range for a pairwise instruction, bail and keep looking.
bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode());
if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
continue;
}
// If the alignment requirements of the paired (scaled) instruction
// can't express the offset of the unscaled input, bail and keep
// looking.
if (IsUnscaled && EnableARM64UnscaledMemOp &&
(alignTo(MinOffset, OffsetStride) != MinOffset)) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
continue;
}
// If the destination register of the loads is the same register, bail
// and keep looking. A load-pair instruction with both destination
// registers the same is UNPREDICTABLE and will result in an exception.
if (mayLoad && Reg == MI->getOperand(0).getReg()) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
continue;
}
// If the Rt of the second instruction was not modified or used between
// the two instructions, we can combine the second into the first.
if (!ModifiedRegs[MI->getOperand(0).getReg()] &&
!UsedRegs[MI->getOperand(0).getReg()]) {
mergeForward = false;
return MBBI;
}
// Likewise, if the Rt of the first instruction is not modified or used
// between the two instructions, we can combine the first into the
// second.
if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] &&
!UsedRegs[FirstMI->getOperand(0).getReg()]) {
mergeForward = true;
return MBBI;
}
// Unable to combine these instructions due to interference in between.
// Keep looking.
}
}
// If the instruction wasn't a matching load or store, but does (or can)
// modify memory, stop searching, as we don't have alias analysis or
// anything like that to tell us whether the access is tromping on the
// locations we care about. The big one we want to catch is calls.
//
// FIXME: Theoretically, we can do better than that for SP and FP based
// references since we can effectively know where those are touching. It's
// unclear if it's worth the extra code, though. Most paired instructions
// will be sequential, perhaps with a few intervening non-memory related
// instructions.
if (MI->mayStore() || MI->isCall())
return E;
// Likewise, if we're matching a store instruction, we don't want to
// move across a load, as it may be reading the same location.
if (FirstMI->mayStore() && MI->mayLoad())
return E;
// Update modified / uses register lists.
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
// Otherwise, if the base register is modified, we have no match, so
// return early.
if (ModifiedRegs[BaseReg])
return E;
}
return E;
}
MachineBasicBlock::iterator
ARM64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Update) {
assert((Update->getOpcode() == ARM64::ADDXri ||
Update->getOpcode() == ARM64::SUBXri) &&
"Unexpected base register update instruction to merge!");
MachineBasicBlock::iterator NextI = I;
// Return the instruction following the merged instruction, which is
// the instruction following our unmerged load. Unless that's the add/sub
// instruction we're merging, in which case it's the one after that.
if (++NextI == Update)
++NextI;
int Value = Update->getOperand(2).getImm();
assert(ARM64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
"Can't merge 1 << 12 offset into pre-indexed load / store");
if (Update->getOpcode() == ARM64::SUBXri)
Value = -Value;
unsigned NewOpc = getPreIndexedOpcode(I->getOpcode());
MachineInstrBuilder MIB =
BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
.addOperand(I->getOperand(0))
.addOperand(I->getOperand(1))
.addImm(Value);
(void)MIB;
DEBUG(dbgs() << "Creating pre-indexed load/store.");
DEBUG(dbgs() << " Replacing instructions:\n ");
DEBUG(I->print(dbgs()));
DEBUG(dbgs() << " ");
DEBUG(Update->print(dbgs()));
DEBUG(dbgs() << " with instruction:\n ");
DEBUG(((MachineInstr *)MIB)->print(dbgs()));
DEBUG(dbgs() << "\n");
// Erase the old instructions for the block.
I->eraseFromParent();
Update->eraseFromParent();
return NextI;
}
MachineBasicBlock::iterator
ARM64LoadStoreOpt::mergePostIdxUpdateInsn(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Update) {
assert((Update->getOpcode() == ARM64::ADDXri ||
Update->getOpcode() == ARM64::SUBXri) &&
"Unexpected base register update instruction to merge!");
MachineBasicBlock::iterator NextI = I;
// Return the instruction following the merged instruction, which is
// the instruction following our unmerged load. Unless that's the add/sub
// instruction we're merging, in which case it's the one after that.
if (++NextI == Update)
++NextI;
int Value = Update->getOperand(2).getImm();
assert(ARM64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
"Can't merge 1 << 12 offset into post-indexed load / store");
if (Update->getOpcode() == ARM64::SUBXri)
Value = -Value;
unsigned NewOpc = getPostIndexedOpcode(I->getOpcode());
MachineInstrBuilder MIB =
BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
.addOperand(I->getOperand(0))
.addOperand(I->getOperand(1))
.addImm(Value);
(void)MIB;
DEBUG(dbgs() << "Creating post-indexed load/store.");
DEBUG(dbgs() << " Replacing instructions:\n ");
DEBUG(I->print(dbgs()));
DEBUG(dbgs() << " ");
DEBUG(Update->print(dbgs()));
DEBUG(dbgs() << " with instruction:\n ");
DEBUG(((MachineInstr *)MIB)->print(dbgs()));
DEBUG(dbgs() << "\n");
// Erase the old instructions for the block.
I->eraseFromParent();
Update->eraseFromParent();
return NextI;
}
static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg,
int Offset) {
switch (MI->getOpcode()) {
default:
break;
case ARM64::SUBXri:
// Negate the offset for a SUB instruction.
Offset *= -1;
// FALLTHROUGH
case ARM64::ADDXri:
// Make sure it's a vanilla immediate operand, not a relocation or
// anything else we can't handle.
if (!MI->getOperand(2).isImm())
break;
// Watch out for 1 << 12 shifted value.
if (ARM64_AM::getShiftValue(MI->getOperand(3).getImm()))
break;
// If the instruction has the base register as source and dest and the
// immediate will fit in a signed 9-bit integer, then we have a match.
if (MI->getOperand(0).getReg() == BaseReg &&
MI->getOperand(1).getReg() == BaseReg &&
MI->getOperand(2).getImm() <= 255 &&
MI->getOperand(2).getImm() >= -256) {
// If we have a non-zero Offset, we check that it matches the amount
// we're adding to the register.
if (!Offset || Offset == MI->getOperand(2).getImm())
return true;
}
break;
}
return false;
}
MachineBasicBlock::iterator
ARM64LoadStoreOpt::findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
unsigned Limit, int Value) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineInstr *MemMI = I;
MachineBasicBlock::iterator MBBI = I;
const MachineFunction &MF = *MemMI->getParent()->getParent();
unsigned DestReg = MemMI->getOperand(0).getReg();
unsigned BaseReg = MemMI->getOperand(1).getReg();
int Offset = MemMI->getOperand(2).getImm() *
TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
// If the base register overlaps the destination register, we can't
// merge the update.
if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
return E;
// Scan forward looking for post-index opportunities.
// Updating instructions can't be formed if the memory insn already
// has an offset other than the value we're looking for.
if (Offset != Value)
return E;
// Track which registers have been modified and used between the first insn
// (inclusive) and the second insn.
BitVector ModifiedRegs, UsedRegs;
ModifiedRegs.resize(TRI->getNumRegs());
UsedRegs.resize(TRI->getNumRegs());
++MBBI;
for (unsigned Count = 0; MBBI != E; ++MBBI) {
MachineInstr *MI = MBBI;
// Skip DBG_VALUE instructions. Otherwise debug info can affect the
// optimization by changing how far we scan.
if (MI->isDebugValue())
continue;
// Now that we know this is a real instruction, count it.
++Count;
// If we found a match, return it.
if (isMatchingUpdateInsn(MI, BaseReg, Value))
return MBBI;
// Update the status of what the instruction clobbered and used.
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
// Otherwise, if the base register is used or modified, we have no match, so
// return early.
if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
return E;
}
return E;
}
MachineBasicBlock::iterator
ARM64LoadStoreOpt::findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I,
unsigned Limit) {
MachineBasicBlock::iterator B = I->getParent()->begin();
MachineBasicBlock::iterator E = I->getParent()->end();
MachineInstr *MemMI = I;
MachineBasicBlock::iterator MBBI = I;
const MachineFunction &MF = *MemMI->getParent()->getParent();
unsigned DestReg = MemMI->getOperand(0).getReg();
unsigned BaseReg = MemMI->getOperand(1).getReg();
int Offset = MemMI->getOperand(2).getImm();
unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
// If the load/store is the first instruction in the block, there's obviously
// not any matching update. Ditto if the memory offset isn't zero.
if (MBBI == B || Offset != 0)
return E;
// If the base register overlaps the destination register, we can't
// merge the update.
if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
return E;
// Track which registers have been modified and used between the first insn
// (inclusive) and the second insn.
BitVector ModifiedRegs, UsedRegs;
ModifiedRegs.resize(TRI->getNumRegs());
UsedRegs.resize(TRI->getNumRegs());
--MBBI;
for (unsigned Count = 0; MBBI != B; --MBBI) {
MachineInstr *MI = MBBI;
// Skip DBG_VALUE instructions. Otherwise debug info can affect the
// optimization by changing how far we scan.
if (MI->isDebugValue())
continue;
// Now that we know this is a real instruction, count it.
++Count;
// If we found a match, return it.
if (isMatchingUpdateInsn(MI, BaseReg, RegSize))
return MBBI;
// Update the status of what the instruction clobbered and used.
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
// Otherwise, if the base register is used or modified, we have no match, so
// return early.
if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
return E;
}
return E;
}
bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
bool Modified = false;
// Two tranformations to do here:
// 1) Find loads and stores that can be merged into a single load or store
// pair instruction.
// e.g.,
// ldr x0, [x2]
// ldr x1, [x2, #8]
// ; becomes
// ldp x0, x1, [x2]
// 2) Find base register updates that can be merged into the load or store
// as a base-reg writeback.
// e.g.,
// ldr x0, [x2]
// add x2, x2, #4
// ; becomes
// ldr x0, [x2], #4
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E;) {
MachineInstr *MI = MBBI;
switch (MI->getOpcode()) {
default:
// Just move on to the next instruction.
++MBBI;
break;
case ARM64::STRSui:
case ARM64::STRDui:
case ARM64::STRQui:
case ARM64::STRXui:
case ARM64::STRWui:
case ARM64::LDRSui:
case ARM64::LDRDui:
case ARM64::LDRQui:
case ARM64::LDRXui:
case ARM64::LDRWui:
// do the unscaled versions as well
case ARM64::STURSi:
case ARM64::STURDi:
case ARM64::STURQi:
case ARM64::STURWi:
case ARM64::STURXi:
case ARM64::LDURSi:
case ARM64::LDURDi:
case ARM64::LDURQi:
case ARM64::LDURWi:
case ARM64::LDURXi: {
// If this is a volatile load/store, don't mess with it.
if (MI->hasOrderedMemoryRef()) {
++MBBI;
break;
}
// Make sure this is a reg+imm (as opposed to an address reloc).
if (!MI->getOperand(2).isImm()) {
++MBBI;
break;
}
// Check if this load/store has a hint to avoid pair formation.
// MachineMemOperands hints are set by the ARM64StorePairSuppress pass.
if (TII->isLdStPairSuppressed(MI)) {
++MBBI;
break;
}
// Look ahead up to ScanLimit instructions for a pairable instruction.
bool mergeForward = false;
MachineBasicBlock::iterator Paired =
findMatchingInsn(MBBI, mergeForward, ScanLimit);
if (Paired != E) {
// Merge the loads into a pair. Keeping the iterator straight is a
// pain, so we let the merge routine tell us what the next instruction
// is after it's done mucking about.
MBBI = mergePairedInsns(MBBI, Paired, mergeForward);
Modified = true;
++NumPairCreated;
if (isUnscaledLdst(MI->getOpcode()))
++NumUnscaledPairCreated;
break;
}
++MBBI;
break;
}
// FIXME: Do the other instructions.
}
}
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E;) {
MachineInstr *MI = MBBI;
// Do update merging. It's simpler to keep this separate from the above
// switch, though not strictly necessary.
int Opc = MI->getOpcode();
switch (Opc) {
default:
// Just move on to the next instruction.
++MBBI;
break;
case ARM64::STRSui:
case ARM64::STRDui:
case ARM64::STRQui:
case ARM64::STRXui:
case ARM64::STRWui:
case ARM64::LDRSui:
case ARM64::LDRDui:
case ARM64::LDRQui:
case ARM64::LDRXui:
case ARM64::LDRWui:
// do the unscaled versions as well
case ARM64::STURSi:
case ARM64::STURDi:
case ARM64::STURQi:
case ARM64::STURWi:
case ARM64::STURXi:
case ARM64::LDURSi:
case ARM64::LDURDi:
case ARM64::LDURQi:
case ARM64::LDURWi:
case ARM64::LDURXi: {
// Make sure this is a reg+imm (as opposed to an address reloc).
if (!MI->getOperand(2).isImm()) {
++MBBI;
break;
}
// Look ahead up to ScanLimit instructions for a mergable instruction.
MachineBasicBlock::iterator Update =
findMatchingUpdateInsnForward(MBBI, ScanLimit, 0);
if (Update != E) {
// Merge the update into the ld/st.
MBBI = mergePostIdxUpdateInsn(MBBI, Update);
Modified = true;
++NumPostFolded;
break;
}
// Don't know how to handle pre/post-index versions, so move to the next
// instruction.
if (isUnscaledLdst(Opc)) {
++MBBI;
break;
}
// Look back to try to find a pre-index instruction. For example,
// add x0, x0, #8
// ldr x1, [x0]
// merged into:
// ldr x1, [x0, #8]!
Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit);
if (Update != E) {
// Merge the update into the ld/st.
MBBI = mergePreIdxUpdateInsn(MBBI, Update);
Modified = true;
++NumPreFolded;
break;
}
// Look forward to try to find a post-index instruction. For example,
// ldr x1, [x0, #64]
// add x0, x0, #64
// merged into:
// ldr x1, [x0], #64
// The immediate in the load/store is scaled by the size of the register
// being loaded. The immediate in the add we're looking for,
// however, is not, so adjust here.
int Value = MI->getOperand(2).getImm() *
TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent()))
->getSize();
Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value);
if (Update != E) {
// Merge the update into the ld/st.
MBBI = mergePreIdxUpdateInsn(MBBI, Update);
Modified = true;
++NumPreFolded;
break;
}
// Nothing found. Just move to the next instruction.
++MBBI;
break;
}
// FIXME: Do the other instructions.
}
}
return Modified;
}
bool ARM64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
// Early exit if pass disabled.
if (!DoLoadStoreOpt)
return false;
const TargetMachine &TM = Fn.getTarget();
TII = static_cast<const ARM64InstrInfo *>(TM.getInstrInfo());
TRI = TM.getRegisterInfo();
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
MachineBasicBlock &MBB = *MFI;
Modified |= optimizeBlock(MBB);
}
return Modified;
}
// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep
// loads and stores near one another?
/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
/// optimization pass.
FunctionPass *llvm::createARM64LoadStoreOptimizationPass() {
return new ARM64LoadStoreOpt();
}

View File

@ -0,0 +1,201 @@
//===-- ARM64MCInstLower.cpp - Convert ARM64 MachineInstr to an MCInst---===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains code to lower ARM64 MachineInstrs to their corresponding
// MCInst records.
//
//===----------------------------------------------------------------------===//
#include "ARM64MCInstLower.h"
#include "MCTargetDesc/ARM64BaseInfo.h"
#include "MCTargetDesc/ARM64MCExpr.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
ARM64MCInstLower::ARM64MCInstLower(MCContext &ctx, Mangler &mang,
AsmPrinter &printer)
: Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {}
MCSymbol *
ARM64MCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
return Printer.getSymbol(MO.getGlobal());
}
MCSymbol *
ARM64MCInstLower::GetExternalSymbolSymbol(const MachineOperand &MO) const {
return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
}
MCOperand ARM64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO,
MCSymbol *Sym) const {
// FIXME: We would like an efficient form for this, so we don't have to do a
// lot of extra uniquing.
MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
if ((MO.getTargetFlags() & ARM64II::MO_GOT) != 0) {
if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
RefKind = MCSymbolRefExpr::VK_GOTPAGE;
else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) ==
ARM64II::MO_PAGEOFF)
RefKind = MCSymbolRefExpr::VK_GOTPAGEOFF;
else
assert(0 && "Unexpected target flags with MO_GOT on GV operand");
} else if ((MO.getTargetFlags() & ARM64II::MO_TLS) != 0) {
if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
RefKind = MCSymbolRefExpr::VK_TLVPPAGE;
else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) ==
ARM64II::MO_PAGEOFF)
RefKind = MCSymbolRefExpr::VK_TLVPPAGEOFF;
else
llvm_unreachable("Unexpected target flags with MO_TLS on GV operand");
} else {
if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
RefKind = MCSymbolRefExpr::VK_PAGE;
else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) ==
ARM64II::MO_PAGEOFF)
RefKind = MCSymbolRefExpr::VK_PAGEOFF;
}
const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
if (!MO.isJTI() && MO.getOffset())
Expr = MCBinaryExpr::CreateAdd(
Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx);
return MCOperand::CreateExpr(Expr);
}
MCOperand ARM64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
MCSymbol *Sym) const {
uint32_t RefFlags = 0;
if (MO.getTargetFlags() & ARM64II::MO_GOT)
RefFlags |= ARM64MCExpr::VK_GOT;
else if (MO.getTargetFlags() & ARM64II::MO_TLS) {
TLSModel::Model Model;
if (MO.isGlobal()) {
const GlobalValue *GV = MO.getGlobal();
Model = Printer.TM.getTLSModel(GV);
} else {
assert(MO.isSymbol() &&
StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" &&
"unexpected external TLS symbol");
Model = TLSModel::GeneralDynamic;
}
switch (Model) {
case TLSModel::InitialExec:
RefFlags |= ARM64MCExpr::VK_GOTTPREL;
break;
case TLSModel::LocalExec:
RefFlags |= ARM64MCExpr::VK_TPREL;
break;
case TLSModel::LocalDynamic:
RefFlags |= ARM64MCExpr::VK_DTPREL;
break;
case TLSModel::GeneralDynamic:
RefFlags |= ARM64MCExpr::VK_TLSDESC;
break;
}
} else {
// No modifier means this is a generic reference, classified as absolute for
// the cases where it matters (:abs_g0: etc).
RefFlags |= ARM64MCExpr::VK_ABS;
}
if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
RefFlags |= ARM64MCExpr::VK_PAGE;
else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGEOFF)
RefFlags |= ARM64MCExpr::VK_PAGEOFF;
else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G3)
RefFlags |= ARM64MCExpr::VK_G3;
else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G2)
RefFlags |= ARM64MCExpr::VK_G2;
else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G1)
RefFlags |= ARM64MCExpr::VK_G1;
else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G0)
RefFlags |= ARM64MCExpr::VK_G0;
if (MO.getTargetFlags() & ARM64II::MO_NC)
RefFlags |= ARM64MCExpr::VK_NC;
const MCExpr *Expr =
MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, Ctx);
if (!MO.isJTI() && MO.getOffset())
Expr = MCBinaryExpr::CreateAdd(
Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx);
ARM64MCExpr::VariantKind RefKind;
RefKind = static_cast<ARM64MCExpr::VariantKind>(RefFlags);
Expr = ARM64MCExpr::Create(Expr, RefKind, Ctx);
return MCOperand::CreateExpr(Expr);
}
MCOperand ARM64MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
MCSymbol *Sym) const {
if (TargetTriple.isOSDarwin())
return lowerSymbolOperandDarwin(MO, Sym);
assert(TargetTriple.isOSBinFormatELF() && "Expect Darwin or ELF target");
return lowerSymbolOperandELF(MO, Sym);
}
bool ARM64MCInstLower::lowerOperand(const MachineOperand &MO,
MCOperand &MCOp) const {
switch (MO.getType()) {
default:
assert(0 && "unknown operand type");
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
if (MO.isImplicit())
return false;
MCOp = MCOperand::CreateReg(MO.getReg());
break;
case MachineOperand::MO_RegisterMask:
// Regmasks are like implicit defs.
return false;
case MachineOperand::MO_Immediate:
MCOp = MCOperand::CreateImm(MO.getImm());
break;
case MachineOperand::MO_MachineBasicBlock:
MCOp = MCOperand::CreateExpr(
MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx));
break;
case MachineOperand::MO_GlobalAddress:
MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
break;
case MachineOperand::MO_ExternalSymbol:
MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
break;
case MachineOperand::MO_JumpTableIndex:
MCOp = LowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex()));
break;
case MachineOperand::MO_ConstantPoolIndex:
MCOp = LowerSymbolOperand(MO, Printer.GetCPISymbol(MO.getIndex()));
break;
case MachineOperand::MO_BlockAddress:
MCOp = LowerSymbolOperand(
MO, Printer.GetBlockAddressSymbol(MO.getBlockAddress()));
break;
}
return true;
}
void ARM64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MCOperand MCOp;
if (lowerOperand(MI->getOperand(i), MCOp))
OutMI.addOperand(MCOp);
}
}

View File

@ -0,0 +1,52 @@
//===-- ARM64MCInstLower.h - Lower MachineInstr to MCInst ----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef ARM64_MCINSTLOWER_H
#define ARM64_MCINSTLOWER_H
#include "llvm/ADT/Triple.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
class AsmPrinter;
class MCAsmInfo;
class MCContext;
class MCInst;
class MCOperand;
class MCSymbol;
class MachineInstr;
class MachineModuleInfoMachO;
class MachineOperand;
class Mangler;
/// ARM64MCInstLower - This class is used to lower an MachineInstr
/// into an MCInst.
class LLVM_LIBRARY_VISIBILITY ARM64MCInstLower {
MCContext &Ctx;
AsmPrinter &Printer;
Triple TargetTriple;
public:
ARM64MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer);
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
MCOperand lowerSymbolOperandDarwin(const MachineOperand &MO,
MCSymbol *Sym) const;
MCOperand lowerSymbolOperandELF(const MachineOperand &MO,
MCSymbol *Sym) const;
MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
};
}
#endif

View File

@ -0,0 +1,126 @@
//===- ARM64MachineFuctionInfo.h - ARM64 machine function info --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares ARM64-specific per-machine-function information.
//
//===----------------------------------------------------------------------===//
#ifndef ARM64MACHINEFUNCTIONINFO_H
#define ARM64MACHINEFUNCTIONINFO_H
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
namespace llvm {
/// ARM64FunctionInfo - This class is derived from MachineFunctionInfo and
/// contains private ARM64-specific information for each MachineFunction.
class ARM64FunctionInfo : public MachineFunctionInfo {
/// HasStackFrame - True if this function has a stack frame. Set by
/// processFunctionBeforeCalleeSavedScan().
bool HasStackFrame;
/// \brief Amount of stack frame size, not including callee-saved registers.
unsigned LocalStackSize;
/// \brief Number of TLS accesses using the special (combinable)
/// _TLS_MODULE_BASE_ symbol.
unsigned NumLocalDynamicTLSAccesses;
/// \brief FrameIndex for start of varargs area for arguments passed on the
/// stack.
int VarArgsStackIndex;
/// \brief FrameIndex for start of varargs area for arguments passed in
/// general purpose registers.
int VarArgsGPRIndex;
/// \brief Size of the varargs area for arguments passed in general purpose
/// registers.
unsigned VarArgsGPRSize;
/// \brief FrameIndex for start of varargs area for arguments passed in
/// floating-point registers.
int VarArgsFPRIndex;
/// \brief Size of the varargs area for arguments passed in floating-point
/// registers.
unsigned VarArgsFPRSize;
public:
ARM64FunctionInfo()
: HasStackFrame(false), NumLocalDynamicTLSAccesses(0),
VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0),
VarArgsFPRIndex(0), VarArgsFPRSize(0) {}
explicit ARM64FunctionInfo(MachineFunction &MF)
: HasStackFrame(false), NumLocalDynamicTLSAccesses(0),
VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0),
VarArgsFPRIndex(0), VarArgsFPRSize(0) {
(void)MF;
}
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
void setLocalStackSize(unsigned Size) { LocalStackSize = Size; }
unsigned getLocalStackSize() const { return LocalStackSize; }
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; }
unsigned getNumLocalDynamicTLSAccesses() const {
return NumLocalDynamicTLSAccesses;
}
int getVarArgsStackIndex() const { return VarArgsStackIndex; }
void setVarArgsStackIndex(int Index) { VarArgsStackIndex = Index; }
int getVarArgsGPRIndex() const { return VarArgsGPRIndex; }
void setVarArgsGPRIndex(int Index) { VarArgsGPRIndex = Index; }
unsigned getVarArgsGPRSize() const { return VarArgsGPRSize; }
void setVarArgsGPRSize(unsigned Size) { VarArgsGPRSize = Size; }
int getVarArgsFPRIndex() const { return VarArgsFPRIndex; }
void setVarArgsFPRIndex(int Index) { VarArgsFPRIndex = Index; }
unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; }
void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; }
typedef SmallPtrSet<const MachineInstr *, 16> SetOfInstructions;
const SetOfInstructions &getLOHRelated() const { return LOHRelated; }
// Shortcuts for LOH related types.
typedef LOHDirective<const MachineInstr> MILOHDirective;
typedef MILOHDirective::LOHArgs MILOHArgs;
typedef LOHContainer<const MachineInstr> MILOHContainer;
typedef MILOHContainer::LOHDirectives MILOHDirectives;
const MILOHContainer &getLOHContainer() const { return LOHContainerSet; }
/// Add a LOH directive of this @p Kind and this @p Args.
void addLOHDirective(MCLOHType Kind, const MILOHArgs &Args) {
LOHContainerSet.addDirective(Kind, Args);
for (MILOHArgs::const_iterator It = Args.begin(), EndIt = Args.end();
It != EndIt; ++It)
LOHRelated.insert(*It);
}
private:
// Hold the lists of LOHs.
MILOHContainer LOHContainerSet;
SetOfInstructions LOHRelated;
};
} // End llvm namespace
#endif // ARM64MACHINEFUNCTIONINFO_H

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,588 @@
//===-- ARM64PromoteConstant.cpp --- Promote constant to global for ARM64 -===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the ARM64PromoteConstant pass which promotes constant
// to global variables when this is likely to be more efficient.
// Currently only types related to constant vector (i.e., constant vector, array
// of constant vectors, constant structure with a constant vector field, etc.)
// are promoted to global variables.
// Indeed, constant vector are likely to be lowered in target constant pool
// during instruction selection.
// Therefore, the access will remain the same (memory load), but the structures
// types are not split into different constant pool accesses for each field.
// The bonus side effect is that created globals may be merged by the global
// merge pass.
//
// FIXME: This pass may be useful for other targets too.
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm64-promote-const"
#include "ARM64.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
// Stress testing mode - disable heuristics.
static cl::opt<bool> Stress("arm64-stress-promote-const", cl::Hidden,
cl::desc("Promote all vector constants"));
STATISTIC(NumPromoted, "Number of promoted constants");
STATISTIC(NumPromotedUses, "Number of promoted constants uses");
//===----------------------------------------------------------------------===//
// ARM64PromoteConstant
//===----------------------------------------------------------------------===//
namespace {
/// Promotes interesting constant into global variables.
/// The motivating example is:
/// static const uint16_t TableA[32] = {
/// 41944, 40330, 38837, 37450, 36158, 34953, 33826, 32768,
/// 31776, 30841, 29960, 29128, 28340, 27595, 26887, 26215,
/// 25576, 24967, 24386, 23832, 23302, 22796, 22311, 21846,
/// 21400, 20972, 20561, 20165, 19785, 19419, 19066, 18725,
/// };
///
/// uint8x16x4_t LoadStatic(void) {
/// uint8x16x4_t ret;
/// ret.val[0] = vld1q_u16(TableA + 0);
/// ret.val[1] = vld1q_u16(TableA + 8);
/// ret.val[2] = vld1q_u16(TableA + 16);
/// ret.val[3] = vld1q_u16(TableA + 24);
/// return ret;
/// }
///
/// The constants in that example are folded into the uses. Thus, 4 different
/// constants are created.
/// As their type is vector the cheapest way to create them is to load them
/// for the memory.
/// Therefore the final assembly final has 4 different load.
/// With this pass enabled, only one load is issued for the constants.
class ARM64PromoteConstant : public ModulePass {
public:
static char ID;
ARM64PromoteConstant() : ModulePass(ID) {}
virtual const char *getPassName() const { return "ARM64 Promote Constant"; }
/// Iterate over the functions and promote the interesting constants into
/// global variables with module scope.
bool runOnModule(Module &M) {
DEBUG(dbgs() << getPassName() << '\n');
bool Changed = false;
for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
++IFn) {
Changed |= runOnFunction(*IFn);
}
return Changed;
}
private:
/// Look for interesting constants used within the given function.
/// Promote them into global variables, load these global variables within
/// the related function, so that the number of inserted load is minimal.
bool runOnFunction(Function &F);
// This transformation requires dominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
}
/// Type to store a list of User
typedef SmallVector<Value::user_iterator, 4> Users;
/// Map an insertion point to all the uses it dominates.
typedef DenseMap<Instruction *, Users> InsertionPoints;
/// Map a function to the required insertion point of load for a
/// global variable
typedef DenseMap<Function *, InsertionPoints> InsertionPointsPerFunc;
/// Find the closest point that dominates the given Use.
Instruction *findInsertionPoint(Value::user_iterator &Use);
/// Check if the given insertion point is dominated by an existing
/// insertion point.
/// If true, the given use is added to the list of dominated uses for
/// the related existing point.
/// \param NewPt the insertion point to be checked
/// \param UseIt the use to be added into the list of dominated uses
/// \param InsertPts existing insertion points
/// \pre NewPt and all instruction in InsertPts belong to the same function
/// \retun true if one of the insertion point in InsertPts dominates NewPt,
/// false otherwise
bool isDominated(Instruction *NewPt, Value::user_iterator &UseIt,
InsertionPoints &InsertPts);
/// Check if the given insertion point can be merged with an existing
/// insertion point in a common dominator.
/// If true, the given use is added to the list of the created insertion
/// point.
/// \param NewPt the insertion point to be checked
/// \param UseIt the use to be added into the list of dominated uses
/// \param InsertPts existing insertion points
/// \pre NewPt and all instruction in InsertPts belong to the same function
/// \pre isDominated returns false for the exact same parameters.
/// \retun true if it exists an insertion point in InsertPts that could
/// have been merged with NewPt in a common dominator,
/// false otherwise
bool tryAndMerge(Instruction *NewPt, Value::user_iterator &UseIt,
InsertionPoints &InsertPts);
/// Compute the minimal insertion points to dominates all the interesting
/// uses of value.
/// Insertion points are group per function and each insertion point
/// contains a list of all the uses it dominates within the related function
/// \param Val constant to be examined
/// \param InsPtsPerFunc[out] output storage of the analysis
void computeInsertionPoints(Constant *Val,
InsertionPointsPerFunc &InsPtsPerFunc);
/// Insert a definition of a new global variable at each point contained in
/// InsPtsPerFunc and update the related uses (also contained in
/// InsPtsPerFunc).
bool insertDefinitions(Constant *Cst, InsertionPointsPerFunc &InsPtsPerFunc);
/// Compute the minimal insertion points to dominate all the interesting
/// uses of Val and insert a definition of a new global variable
/// at these points.
/// Also update the uses of Val accordingly.
/// Currently a use of Val is considered interesting if:
/// - Val is not UndefValue
/// - Val is not zeroinitialized
/// - Replacing Val per a load of a global variable is valid.
/// \see shouldConvert for more details
bool computeAndInsertDefinitions(Constant *Val);
/// Promote the given constant into a global variable if it is expected to
/// be profitable.
/// \return true if Cst has been promoted
bool promoteConstant(Constant *Cst);
/// Transfer the list of dominated uses of IPI to NewPt in InsertPts.
/// Append UseIt to this list and delete the entry of IPI in InsertPts.
static void appendAndTransferDominatedUses(Instruction *NewPt,
Value::user_iterator &UseIt,
InsertionPoints::iterator &IPI,
InsertionPoints &InsertPts) {
// Record the dominated use
IPI->second.push_back(UseIt);
// Transfer the dominated uses of IPI to NewPt
// Inserting into the DenseMap may invalidate existing iterator.
// Keep a copy of the key to find the iterator to erase.
Instruction *OldInstr = IPI->first;
InsertPts.insert(InsertionPoints::value_type(NewPt, IPI->second));
// Erase IPI
IPI = InsertPts.find(OldInstr);
InsertPts.erase(IPI);
}
};
} // end anonymous namespace
char ARM64PromoteConstant::ID = 0;
namespace llvm {
void initializeARM64PromoteConstantPass(PassRegistry &);
}
INITIALIZE_PASS_BEGIN(ARM64PromoteConstant, "arm64-promote-const",
"ARM64 Promote Constant Pass", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(ARM64PromoteConstant, "arm64-promote-const",
"ARM64 Promote Constant Pass", false, false)
ModulePass *llvm::createARM64PromoteConstantPass() {
return new ARM64PromoteConstant();
}
/// Check if the given type uses a vector type.
static bool isConstantUsingVectorTy(const Type *CstTy) {
if (CstTy->isVectorTy())
return true;
if (CstTy->isStructTy()) {
for (unsigned EltIdx = 0, EndEltIdx = CstTy->getStructNumElements();
EltIdx < EndEltIdx; ++EltIdx)
if (isConstantUsingVectorTy(CstTy->getStructElementType(EltIdx)))
return true;
} else if (CstTy->isArrayTy())
return isConstantUsingVectorTy(CstTy->getArrayElementType());
return false;
}
/// Check if the given use (Instruction + OpIdx) of Cst should be converted into
/// a load of a global variable initialized with Cst.
/// A use should be converted if it is legal to do so.
/// For instance, it is not legal to turn the mask operand of a shuffle vector
/// into a load of a global variable.
static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr,
unsigned OpIdx) {
// shufflevector instruction expects a const for the mask argument, i.e., the
// third argument. Do not promote this use in that case.
if (isa<const ShuffleVectorInst>(Instr) && OpIdx == 2)
return false;
// extractvalue instruction expects a const idx
if (isa<const ExtractValueInst>(Instr) && OpIdx > 0)
return false;
// extractvalue instruction expects a const idx
if (isa<const InsertValueInst>(Instr) && OpIdx > 1)
return false;
if (isa<const AllocaInst>(Instr) && OpIdx > 0)
return false;
// Alignment argument must be constant
if (isa<const LoadInst>(Instr) && OpIdx > 0)
return false;
// Alignment argument must be constant
if (isa<const StoreInst>(Instr) && OpIdx > 1)
return false;
// Index must be constant
if (isa<const GetElementPtrInst>(Instr) && OpIdx > 0)
return false;
// Personality function and filters must be constant.
// Give up on that instruction.
if (isa<const LandingPadInst>(Instr))
return false;
// switch instruction expects constants to compare to
if (isa<const SwitchInst>(Instr))
return false;
// Expected address must be a constant
if (isa<const IndirectBrInst>(Instr))
return false;
// Do not mess with intrinsic
if (isa<const IntrinsicInst>(Instr))
return false;
// Do not mess with inline asm
const CallInst *CI = dyn_cast<const CallInst>(Instr);
if (CI && isa<const InlineAsm>(CI->getCalledValue()))
return false;
return true;
}
/// Check if the given Cst should be converted into
/// a load of a global variable initialized with Cst.
/// A constant should be converted if it is likely that the materialization of
/// the constant will be tricky. Thus, we give up on zero or undef values.
///
/// \todo Currently, accept only vector related types.
/// Also we give up on all simple vector type to keep the existing
/// behavior. Otherwise, we should push here all the check of the lowering of
/// BUILD_VECTOR. By giving up, we lose the potential benefit of merging
/// constant via global merge and the fact that the same constant is stored
/// only once with this method (versus, as many function that uses the constant
/// for the regular approach, even for float).
/// Again, the simplest solution would be to promote every
/// constant and rematerialize them when they are actually cheap to create.
static bool shouldConvert(const Constant *Cst) {
if (isa<const UndefValue>(Cst))
return false;
// FIXME: In some cases, it may be interesting to promote in memory
// a zero initialized constant.
// E.g., when the type of Cst require more instructions than the
// adrp/add/load sequence or when this sequence can be shared by several
// instances of Cst.
// Ideally, we could promote this into a global and rematerialize the constant
// when it was a bad idea.
if (Cst->isZeroValue())
return false;
if (Stress)
return true;
// FIXME: see function \todo
if (Cst->getType()->isVectorTy())
return false;
return isConstantUsingVectorTy(Cst->getType());
}
Instruction *
ARM64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) {
// If this user is a phi, the insertion point is in the related
// incoming basic block
PHINode *PhiInst = dyn_cast<PHINode>(*Use);
Instruction *InsertionPoint;
if (PhiInst)
InsertionPoint =
PhiInst->getIncomingBlock(Use.getOperandNo())->getTerminator();
else
InsertionPoint = dyn_cast<Instruction>(*Use);
assert(InsertionPoint && "User is not an instruction!");
return InsertionPoint;
}
bool ARM64PromoteConstant::isDominated(Instruction *NewPt,
Value::user_iterator &UseIt,
InsertionPoints &InsertPts) {
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
*NewPt->getParent()->getParent()).getDomTree();
// Traverse all the existing insertion point and check if one is dominating
// NewPt
for (InsertionPoints::iterator IPI = InsertPts.begin(),
EndIPI = InsertPts.end();
IPI != EndIPI; ++IPI) {
if (NewPt == IPI->first || DT.dominates(IPI->first, NewPt) ||
// When IPI->first is a terminator instruction, DT may think that
// the result is defined on the edge.
// Here we are testing the insertion point, not the definition.
(IPI->first->getParent() != NewPt->getParent() &&
DT.dominates(IPI->first->getParent(), NewPt->getParent()))) {
// No need to insert this point
// Record the dominated use
DEBUG(dbgs() << "Insertion point dominated by:\n");
DEBUG(IPI->first->print(dbgs()));
DEBUG(dbgs() << '\n');
IPI->second.push_back(UseIt);
return true;
}
}
return false;
}
bool ARM64PromoteConstant::tryAndMerge(Instruction *NewPt,
Value::user_iterator &UseIt,
InsertionPoints &InsertPts) {
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
*NewPt->getParent()->getParent()).getDomTree();
BasicBlock *NewBB = NewPt->getParent();
// Traverse all the existing insertion point and check if one is dominated by
// NewPt and thus useless or can be combined with NewPt into a common
// dominator
for (InsertionPoints::iterator IPI = InsertPts.begin(),
EndIPI = InsertPts.end();
IPI != EndIPI; ++IPI) {
BasicBlock *CurBB = IPI->first->getParent();
if (NewBB == CurBB) {
// Instructions are in the same block.
// By construction, NewPt is dominating the other.
// Indeed, isDominated returned false with the exact same arguments.
DEBUG(dbgs() << "Merge insertion point with:\n");
DEBUG(IPI->first->print(dbgs()));
DEBUG(dbgs() << "\nat considered insertion point.\n");
appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts);
return true;
}
// Look for a common dominator
BasicBlock *CommonDominator = DT.findNearestCommonDominator(NewBB, CurBB);
// If none exists, we cannot merge these two points
if (!CommonDominator)
continue;
if (CommonDominator != NewBB) {
// By construction, the CommonDominator cannot be CurBB
assert(CommonDominator != CurBB &&
"Instruction has not been rejected during isDominated check!");
// Take the last instruction of the CommonDominator as insertion point
NewPt = CommonDominator->getTerminator();
}
// else, CommonDominator is the block of NewBB, hence NewBB is the last
// possible insertion point in that block
DEBUG(dbgs() << "Merge insertion point with:\n");
DEBUG(IPI->first->print(dbgs()));
DEBUG(dbgs() << '\n');
DEBUG(NewPt->print(dbgs()));
DEBUG(dbgs() << '\n');
appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts);
return true;
}
return false;
}
void ARM64PromoteConstant::computeInsertionPoints(
Constant *Val, InsertionPointsPerFunc &InsPtsPerFunc) {
DEBUG(dbgs() << "** Compute insertion points **\n");
for (Value::user_iterator UseIt = Val->user_begin(),
EndUseIt = Val->user_end();
UseIt != EndUseIt; ++UseIt) {
// If the user is not an Instruction, we cannot modify it
if (!isa<Instruction>(*UseIt))
continue;
// Filter out uses that should not be converted
if (!shouldConvertUse(Val, cast<Instruction>(*UseIt), UseIt.getOperandNo()))
continue;
DEBUG(dbgs() << "Considered use, opidx " << UseIt.getOperandNo() << ":\n");
DEBUG((*UseIt)->print(dbgs()));
DEBUG(dbgs() << '\n');
Instruction *InsertionPoint = findInsertionPoint(UseIt);
DEBUG(dbgs() << "Considered insertion point:\n");
DEBUG(InsertionPoint->print(dbgs()));
DEBUG(dbgs() << '\n');
// Check if the current insertion point is useless, i.e., it is dominated
// by another one.
InsertionPoints &InsertPts =
InsPtsPerFunc[InsertionPoint->getParent()->getParent()];
if (isDominated(InsertionPoint, UseIt, InsertPts))
continue;
// This insertion point is useful, check if we can merge some insertion
// point in a common dominator or if NewPt dominates an existing one.
if (tryAndMerge(InsertionPoint, UseIt, InsertPts))
continue;
DEBUG(dbgs() << "Keep considered insertion point\n");
// It is definitely useful by its own
InsertPts[InsertionPoint].push_back(UseIt);
}
}
bool
ARM64PromoteConstant::insertDefinitions(Constant *Cst,
InsertionPointsPerFunc &InsPtsPerFunc) {
// We will create one global variable per Module
DenseMap<Module *, GlobalVariable *> ModuleToMergedGV;
bool HasChanged = false;
// Traverse all insertion points in all the function
for (InsertionPointsPerFunc::iterator FctToInstPtsIt = InsPtsPerFunc.begin(),
EndIt = InsPtsPerFunc.end();
FctToInstPtsIt != EndIt; ++FctToInstPtsIt) {
InsertionPoints &InsertPts = FctToInstPtsIt->second;
// Do more check for debug purposes
#ifndef NDEBUG
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
*FctToInstPtsIt->first).getDomTree();
#endif
GlobalVariable *PromotedGV;
assert(!InsertPts.empty() && "Empty uses does not need a definition");
Module *M = FctToInstPtsIt->first->getParent();
DenseMap<Module *, GlobalVariable *>::iterator MapIt =
ModuleToMergedGV.find(M);
if (MapIt == ModuleToMergedGV.end()) {
PromotedGV = new GlobalVariable(
*M, Cst->getType(), true, GlobalValue::InternalLinkage, 0,
"_PromotedConst", 0, GlobalVariable::NotThreadLocal);
PromotedGV->setInitializer(Cst);
ModuleToMergedGV[M] = PromotedGV;
DEBUG(dbgs() << "Global replacement: ");
DEBUG(PromotedGV->print(dbgs()));
DEBUG(dbgs() << '\n');
++NumPromoted;
HasChanged = true;
} else {
PromotedGV = MapIt->second;
}
for (InsertionPoints::iterator IPI = InsertPts.begin(),
EndIPI = InsertPts.end();
IPI != EndIPI; ++IPI) {
// Create the load of the global variable
IRBuilder<> Builder(IPI->first->getParent(), IPI->first);
LoadInst *LoadedCst = Builder.CreateLoad(PromotedGV);
DEBUG(dbgs() << "**********\n");
DEBUG(dbgs() << "New def: ");
DEBUG(LoadedCst->print(dbgs()));
DEBUG(dbgs() << '\n');
// Update the dominated uses
Users &DominatedUsers = IPI->second;
for (Users::iterator UseIt = DominatedUsers.begin(),
EndIt = DominatedUsers.end();
UseIt != EndIt; ++UseIt) {
#ifndef NDEBUG
assert((DT.dominates(LoadedCst, cast<Instruction>(**UseIt)) ||
(isa<PHINode>(**UseIt) &&
DT.dominates(LoadedCst, findInsertionPoint(*UseIt)))) &&
"Inserted definition does not dominate all its uses!");
#endif
DEBUG(dbgs() << "Use to update " << UseIt->getOperandNo() << ":");
DEBUG((*UseIt)->print(dbgs()));
DEBUG(dbgs() << '\n');
(*UseIt)->setOperand(UseIt->getOperandNo(), LoadedCst);
++NumPromotedUses;
}
}
}
return HasChanged;
}
bool ARM64PromoteConstant::computeAndInsertDefinitions(Constant *Val) {
InsertionPointsPerFunc InsertPtsPerFunc;
computeInsertionPoints(Val, InsertPtsPerFunc);
return insertDefinitions(Val, InsertPtsPerFunc);
}
bool ARM64PromoteConstant::promoteConstant(Constant *Cst) {
assert(Cst && "Given variable is not a valid constant.");
if (!shouldConvert(Cst))
return false;
DEBUG(dbgs() << "******************************\n");
DEBUG(dbgs() << "Candidate constant: ");
DEBUG(Cst->print(dbgs()));
DEBUG(dbgs() << '\n');
return computeAndInsertDefinitions(Cst);
}
bool ARM64PromoteConstant::runOnFunction(Function &F) {
// Look for instructions using constant vector
// Promote that constant to a global variable.
// Create as few load of this variable as possible and update the uses
// accordingly
bool LocalChange = false;
SmallSet<Constant *, 8> AlreadyChecked;
for (Function::iterator IBB = F.begin(), IEndBB = F.end(); IBB != IEndBB;
++IBB) {
for (BasicBlock::iterator II = IBB->begin(), IEndI = IBB->end();
II != IEndI; ++II) {
// Traverse the operand, looking for constant vectors
// Replace them by a load of a global variable of type constant vector
for (unsigned OpIdx = 0, EndOpIdx = II->getNumOperands();
OpIdx != EndOpIdx; ++OpIdx) {
Constant *Cst = dyn_cast<Constant>(II->getOperand(OpIdx));
// There is no point is promoting global value, they are already global.
// Do not promote constant expression, as they may require some code
// expansion.
if (Cst && !isa<GlobalValue>(Cst) && !isa<ConstantExpr>(Cst) &&
AlreadyChecked.insert(Cst))
LocalChange |= promoteConstant(Cst);
}
}
}
return LocalChange;
}

View File

@ -0,0 +1,402 @@
//===- ARM64RegisterInfo.cpp - ARM64 Register Information -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the ARM64 implementation of the TargetRegisterInfo class.
//
//===----------------------------------------------------------------------===//
#include "ARM64RegisterInfo.h"
#include "ARM64FrameLowering.h"
#include "ARM64InstrInfo.h"
#include "ARM64Subtarget.h"
#include "MCTargetDesc/ARM64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetOptions.h"
#define GET_REGINFO_TARGET_DESC
#include "ARM64GenRegisterInfo.inc"
using namespace llvm;
ARM64RegisterInfo::ARM64RegisterInfo(const ARM64InstrInfo *tii,
const ARM64Subtarget *sti)
: ARM64GenRegisterInfo(ARM64::LR), TII(tii), STI(sti) {}
const uint16_t *
ARM64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg)
return CSR_ARM64_AllRegs_SaveList;
else
return CSR_ARM64_AAPCS_SaveList;
}
const uint32_t *
ARM64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
if (CC == CallingConv::AnyReg)
return CSR_ARM64_AllRegs_RegMask;
else
return CSR_ARM64_AAPCS_RegMask;
}
const uint32_t *ARM64RegisterInfo::getTLSCallPreservedMask() const {
if (STI->isTargetDarwin())
return CSR_ARM64_TLS_Darwin_RegMask;
assert(STI->isTargetELF() && "only expect Darwin or ELF TLS");
return CSR_ARM64_TLS_ELF_RegMask;
}
const uint32_t *
ARM64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
// This should return a register mask that is the same as that returned by
// getCallPreservedMask but that additionally preserves the register used for
// the first i64 argument (which must also be the register used to return a
// single i64 return value)
//
// In case that the calling convention does not use the same register for
// both, the function should return NULL (does not currently apply)
return CSR_ARM64_AAPCS_ThisReturn_RegMask;
}
BitVector ARM64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
// FIXME: avoid re-calculating this everytime.
BitVector Reserved(getNumRegs());
Reserved.set(ARM64::SP);
Reserved.set(ARM64::XZR);
Reserved.set(ARM64::WSP);
Reserved.set(ARM64::WZR);
if (TFI->hasFP(MF) || STI->isTargetDarwin()) {
Reserved.set(ARM64::FP);
Reserved.set(ARM64::W29);
}
if (STI->isTargetDarwin()) {
Reserved.set(ARM64::X18); // Platform register
Reserved.set(ARM64::W18);
}
if (hasBasePointer(MF)) {
Reserved.set(ARM64::X19);
Reserved.set(ARM64::W19);
}
return Reserved;
}
bool ARM64RegisterInfo::isReservedReg(const MachineFunction &MF,
unsigned Reg) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
switch (Reg) {
default:
break;
case ARM64::SP:
case ARM64::XZR:
case ARM64::WSP:
case ARM64::WZR:
return true;
case ARM64::X18:
case ARM64::W18:
return STI->isTargetDarwin();
case ARM64::FP:
case ARM64::W29:
return TFI->hasFP(MF) || STI->isTargetDarwin();
case ARM64::W19:
case ARM64::X19:
return hasBasePointer(MF);
}
return false;
}
const TargetRegisterClass *
ARM64RegisterInfo::getPointerRegClass(const MachineFunction &MF,
unsigned Kind) const {
return &ARM64::GPR64RegClass;
}
const TargetRegisterClass *
ARM64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
if (RC == &ARM64::CCRRegClass)
return NULL; // Can't copy CPSR.
return RC;
}
unsigned ARM64RegisterInfo::getBaseRegister() const { return ARM64::X19; }
bool ARM64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
// In the presence of variable sized objects, if the fixed stack size is
// large enough that referencing from the FP won't result in things being
// in range relatively often, we can use a base pointer to allow access
// from the other direction like the SP normally works.
if (MFI->hasVarSizedObjects()) {
// Conservatively estimate whether the negative offset from the frame
// pointer will be sufficient to reach. If a function has a smallish
// frame, it's less likely to have lots of spills and callee saved
// space, so it's all more likely to be within range of the frame pointer.
// If it's wrong, we'll materialize the constant and still get to the
// object; it's just suboptimal. Negative offsets use the unscaled
// load/store instructions, which have a 9-bit signed immediate.
if (MFI->getLocalFrameSize() < 256)
return false;
return true;
}
return false;
}
unsigned ARM64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
return TFI->hasFP(MF) ? ARM64::FP : ARM64::SP;
}
bool
ARM64RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
return true;
}
bool ARM64RegisterInfo::requiresVirtualBaseRegisters(const MachineFunction &MF)
const {
return true;
}
bool
ARM64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
// ARM64FrameLowering::resolveFrameIndexReference() can always fall back
// to the stack pointer, so only put the emergency spill slot next to the
// FP when there's no better way to access it (SP or base pointer).
return MFI->hasVarSizedObjects() && !hasBasePointer(MF);
}
bool ARM64RegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF)
const {
return true;
}
bool ARM64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
// Only consider eliminating leaf frames.
if (MFI->hasCalls() || (MF.getTarget().Options.DisableFramePointerElim(MF) &&
MFI->adjustsStack()))
return true;
return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
}
/// needsFrameBaseReg - Returns true if the instruction's frame index
/// reference would be better served by a base register other than FP
/// or SP. Used by LocalStackFrameAllocation to determine which frame index
/// references it should create new base registers for.
bool ARM64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
int64_t Offset) const {
for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i)
assert(i < MI->getNumOperands() &&
"Instr doesn't have FrameIndex operand!");
// It's the load/store FI references that cause issues, as it can be difficult
// to materialize the offset if it won't fit in the literal field. Estimate
// based on the size of the local frame and some conservative assumptions
// about the rest of the stack frame (note, this is pre-regalloc, so
// we don't know everything for certain yet) whether this offset is likely
// to be out of range of the immediate. Return true if so.
// We only generate virtual base registers for loads and stores, so
// return false for everything else.
if (!MI->mayLoad() && !MI->mayStore())
return false;
// Without a virtual base register, if the function has variable sized
// objects, all fixed-size local references will be via the frame pointer,
// Approximate the offset and see if it's legal for the instruction.
// Note that the incoming offset is based on the SP value at function entry,
// so it'll be negative.
MachineFunction &MF = *MI->getParent()->getParent();
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
MachineFrameInfo *MFI = MF.getFrameInfo();
// Estimate an offset from the frame pointer.
// Conservatively assume all GPR callee-saved registers get pushed.
// FP, LR, X19-X28, D8-D15. 64-bits each.
int64_t FPOffset = Offset - 16 * 20;
// Estimate an offset from the stack pointer.
// The incoming offset is relating to the SP at the start of the function,
// but when we access the local it'll be relative to the SP after local
// allocation, so adjust our SP-relative offset by that allocation size.
Offset += MFI->getLocalFrameSize();
// Assume that we'll have at least some spill slots allocated.
// FIXME: This is a total SWAG number. We should run some statistics
// and pick a real one.
Offset += 128; // 128 bytes of spill slots
// If there is a frame pointer, try using it.
// The FP is only available if there is no dynamic realignment. We
// don't know for sure yet whether we'll need that, so we guess based
// on whether there are any local variables that would trigger it.
if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, FPOffset))
return false;
// If we can reference via the stack pointer or base pointer, try that.
// FIXME: This (and the code that resolves the references) can be improved
// to only disallow SP relative references in the live range of
// the VLA(s). In practice, it's unclear how much difference that
// would make, but it may be worth doing.
if (isFrameOffsetLegal(MI, Offset))
return false;
// The offset likely isn't legal; we want to allocate a virtual base register.
return true;
}
bool ARM64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
int64_t Offset) const {
assert(Offset <= INT_MAX && "Offset too big to fit in int.");
assert(MI && "Unable to get the legal offset for nil instruction.");
int SaveOffset = Offset;
return isARM64FrameOffsetLegal(*MI, SaveOffset) & ARM64FrameOffsetIsLegal;
}
/// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
/// at the beginning of the basic block.
void ARM64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
unsigned BaseReg,
int FrameIdx,
int64_t Offset) const {
MachineBasicBlock::iterator Ins = MBB->begin();
DebugLoc DL; // Defaults to "unknown"
if (Ins != MBB->end())
DL = Ins->getDebugLoc();
const MCInstrDesc &MCID = TII->get(ARM64::ADDXri);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
const MachineFunction &MF = *MBB->getParent();
MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF));
unsigned Shifter = ARM64_AM::getShifterImm(ARM64_AM::LSL, 0);
BuildMI(*MBB, Ins, DL, MCID, BaseReg)
.addFrameIndex(FrameIdx)
.addImm(Offset)
.addImm(Shifter);
}
void ARM64RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
unsigned BaseReg,
int64_t Offset) const {
MachineInstr &MI = *I;
int Off = Offset; // ARM doesn't need the general 64-bit offsets
unsigned i = 0;
while (!MI.getOperand(i).isFI()) {
++i;
assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
}
bool Done = rewriteARM64FrameIndex(MI, i, BaseReg, Off, TII);
assert(Done && "Unable to resolve frame index!");
(void)Done;
}
void ARM64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const ARM64FrameLowering *TFI = static_cast<const ARM64FrameLowering *>(
MF.getTarget().getFrameLowering());
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
unsigned FrameReg;
int Offset;
// Special handling of dbg_value, stackmap and patchpoint instructions.
if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STACKMAP ||
MI.getOpcode() == TargetOpcode::PATCHPOINT) {
Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg,
/*PreferFP=*/true);
Offset += MI.getOperand(FIOperandNum + 1).getImm();
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
return;
}
// Modify MI as necessary to handle as much of 'Offset' as possible
Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg);
if (rewriteARM64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
return;
assert((!RS || !RS->isScavengingFrameIndex(FrameIndex)) &&
"Emergency spill slot is out of reach");
// If we get here, the immediate doesn't fit into the instruction. We folded
// as much as possible above. Handle the rest, providing a register that is
// SP+LargeImm.
unsigned ScratchReg =
MF.getRegInfo().createVirtualRegister(&ARM64::GPR64RegClass);
emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true);
}
namespace llvm {
unsigned ARM64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
switch (RC->getID()) {
default:
return 0;
case ARM64::GPR32RegClassID:
case ARM64::GPR32spRegClassID:
case ARM64::GPR32allRegClassID:
case ARM64::GPR64spRegClassID:
case ARM64::GPR64allRegClassID:
case ARM64::GPR64RegClassID:
case ARM64::GPR32commonRegClassID:
case ARM64::GPR64commonRegClassID:
return 32 - 1 // XZR/SP
- (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP
- STI->isTargetDarwin() // X18 reserved as platform register
- hasBasePointer(MF); // X19
case ARM64::FPR8RegClassID:
case ARM64::FPR16RegClassID:
case ARM64::FPR32RegClassID:
case ARM64::FPR64RegClassID:
case ARM64::FPR128RegClassID:
return 32;
case ARM64::DDRegClassID:
case ARM64::DDDRegClassID:
case ARM64::DDDDRegClassID:
case ARM64::QQRegClassID:
case ARM64::QQQRegClassID:
case ARM64::QQQQRegClassID:
return 32;
case ARM64::FPR128_loRegClassID:
return 16;
}
}
} // namespace llvm

View File

@ -0,0 +1,89 @@
//===- ARM64RegisterInfo.h - ARM64 Register Information Impl ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the ARM64 implementation of the MRegisterInfo class.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TARGET_ARM64REGISTERINFO_H
#define LLVM_TARGET_ARM64REGISTERINFO_H
#define GET_REGINFO_HEADER
#include "ARM64GenRegisterInfo.inc"
namespace llvm {
class ARM64InstrInfo;
class ARM64Subtarget;
class MachineFunction;
class RegScavenger;
class TargetRegisterClass;
struct ARM64RegisterInfo : public ARM64GenRegisterInfo {
private:
const ARM64InstrInfo *TII;
const ARM64Subtarget *STI;
public:
ARM64RegisterInfo(const ARM64InstrInfo *tii, const ARM64Subtarget *sti);
/// Code Generation virtual methods...
bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID) const;
// Calls involved in thread-local variable lookup save more registers than
// normal calls, so they need a different mask to represent this.
const uint32_t *getTLSCallPreservedMask() const;
/// getThisReturnPreservedMask - Returns a call preserved mask specific to the
/// case that 'returned' is on an i64 first argument if the calling convention
/// is one that can (partially) model this attribute with a preserved mask
/// (i.e. it is a calling convention that uses the same register for the first
/// i64 argument and an i64 return value)
///
/// Should return NULL in the case that the calling convention does not have
/// this property
const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
unsigned Kind = 0) const;
const TargetRegisterClass *
getCrossCopyRegClass(const TargetRegisterClass *RC) const;
bool requiresRegisterScavenging(const MachineFunction &MF) const;
bool useFPForScavengingIndex(const MachineFunction &MF) const;
bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg,
int FrameIdx, int64_t Offset) const;
void resolveFrameIndex(MachineBasicBlock::iterator I, unsigned BaseReg,
int64_t Offset) const;
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS = NULL) const;
bool cannotEliminateFrame(const MachineFunction &MF) const;
bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
bool hasBasePointer(const MachineFunction &MF) const;
unsigned getBaseRegister() const;
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const;
};
} // end namespace llvm
#endif // LLVM_TARGET_ARM64REGISTERINFO_H

View File

@ -0,0 +1,561 @@
//===- ARM64RegisterInfo.td - Describe the ARM64 Regisers --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//
//===----------------------------------------------------------------------===//
class ARM64Reg<bits<16> enc, string n, list<Register> subregs = [],
list<string> altNames = []>
: Register<n, altNames> {
let HWEncoding = enc;
let Namespace = "ARM64";
let SubRegs = subregs;
}
let Namespace = "ARM64" in {
def sub_32 : SubRegIndex<32>;
def bsub : SubRegIndex<8>;
def hsub : SubRegIndex<16>;
def ssub : SubRegIndex<32>;
def dsub : SubRegIndex<32>;
def qhisub : SubRegIndex<64>;
def qsub : SubRegIndex<64>;
// Note: Code depends on these having consecutive numbers
def dsub0 : SubRegIndex<64>;
def dsub1 : SubRegIndex<64>;
def dsub2 : SubRegIndex<64>;
def dsub3 : SubRegIndex<64>;
// Note: Code depends on these having consecutive numbers
def qsub0 : SubRegIndex<128>;
def qsub1 : SubRegIndex<128>;
def qsub2 : SubRegIndex<128>;
def qsub3 : SubRegIndex<128>;
}
let Namespace = "ARM64" in {
def vreg : RegAltNameIndex;
def vlist1 : RegAltNameIndex;
}
//===----------------------------------------------------------------------===//
// Registers
//===----------------------------------------------------------------------===//
def W0 : ARM64Reg<0, "w0" >, DwarfRegNum<[0]>;
def W1 : ARM64Reg<1, "w1" >, DwarfRegNum<[1]>;
def W2 : ARM64Reg<2, "w2" >, DwarfRegNum<[2]>;
def W3 : ARM64Reg<3, "w3" >, DwarfRegNum<[3]>;
def W4 : ARM64Reg<4, "w4" >, DwarfRegNum<[4]>;
def W5 : ARM64Reg<5, "w5" >, DwarfRegNum<[5]>;
def W6 : ARM64Reg<6, "w6" >, DwarfRegNum<[6]>;
def W7 : ARM64Reg<7, "w7" >, DwarfRegNum<[7]>;
def W8 : ARM64Reg<8, "w8" >, DwarfRegNum<[8]>;
def W9 : ARM64Reg<9, "w9" >, DwarfRegNum<[9]>;
def W10 : ARM64Reg<10, "w10">, DwarfRegNum<[10]>;
def W11 : ARM64Reg<11, "w11">, DwarfRegNum<[11]>;
def W12 : ARM64Reg<12, "w12">, DwarfRegNum<[12]>;
def W13 : ARM64Reg<13, "w13">, DwarfRegNum<[13]>;
def W14 : ARM64Reg<14, "w14">, DwarfRegNum<[14]>;
def W15 : ARM64Reg<15, "w15">, DwarfRegNum<[15]>;
def W16 : ARM64Reg<16, "w16">, DwarfRegNum<[16]>;
def W17 : ARM64Reg<17, "w17">, DwarfRegNum<[17]>;
def W18 : ARM64Reg<18, "w18">, DwarfRegNum<[18]>;
def W19 : ARM64Reg<19, "w19">, DwarfRegNum<[19]>;
def W20 : ARM64Reg<20, "w20">, DwarfRegNum<[20]>;
def W21 : ARM64Reg<21, "w21">, DwarfRegNum<[21]>;
def W22 : ARM64Reg<22, "w22">, DwarfRegNum<[22]>;
def W23 : ARM64Reg<23, "w23">, DwarfRegNum<[23]>;
def W24 : ARM64Reg<24, "w24">, DwarfRegNum<[24]>;
def W25 : ARM64Reg<25, "w25">, DwarfRegNum<[25]>;
def W26 : ARM64Reg<26, "w26">, DwarfRegNum<[26]>;
def W27 : ARM64Reg<27, "w27">, DwarfRegNum<[27]>;
def W28 : ARM64Reg<28, "w28">, DwarfRegNum<[28]>;
def W29 : ARM64Reg<29, "w29">, DwarfRegNum<[29]>;
def W30 : ARM64Reg<30, "w30">, DwarfRegNum<[30]>;
def WSP : ARM64Reg<31, "wsp">, DwarfRegNum<[31]>;
def WZR : ARM64Reg<31, "wzr">, DwarfRegAlias<WSP>;
let SubRegIndices = [sub_32] in {
def X0 : ARM64Reg<0, "x0", [W0]>, DwarfRegAlias<W0>;
def X1 : ARM64Reg<1, "x1", [W1]>, DwarfRegAlias<W1>;
def X2 : ARM64Reg<2, "x2", [W2]>, DwarfRegAlias<W2>;
def X3 : ARM64Reg<3, "x3", [W3]>, DwarfRegAlias<W3>;
def X4 : ARM64Reg<4, "x4", [W4]>, DwarfRegAlias<W4>;
def X5 : ARM64Reg<5, "x5", [W5]>, DwarfRegAlias<W5>;
def X6 : ARM64Reg<6, "x6", [W6]>, DwarfRegAlias<W6>;
def X7 : ARM64Reg<7, "x7", [W7]>, DwarfRegAlias<W7>;
def X8 : ARM64Reg<8, "x8", [W8]>, DwarfRegAlias<W8>;
def X9 : ARM64Reg<9, "x9", [W9]>, DwarfRegAlias<W9>;
def X10 : ARM64Reg<10, "x10", [W10]>, DwarfRegAlias<W10>;
def X11 : ARM64Reg<11, "x11", [W11]>, DwarfRegAlias<W11>;
def X12 : ARM64Reg<12, "x12", [W12]>, DwarfRegAlias<W12>;
def X13 : ARM64Reg<13, "x13", [W13]>, DwarfRegAlias<W13>;
def X14 : ARM64Reg<14, "x14", [W14]>, DwarfRegAlias<W14>;
def X15 : ARM64Reg<15, "x15", [W15]>, DwarfRegAlias<W15>;
def X16 : ARM64Reg<16, "x16", [W16]>, DwarfRegAlias<W16>;
def X17 : ARM64Reg<17, "x17", [W17]>, DwarfRegAlias<W17>;
def X18 : ARM64Reg<18, "x18", [W18]>, DwarfRegAlias<W18>;
def X19 : ARM64Reg<19, "x19", [W19]>, DwarfRegAlias<W19>;
def X20 : ARM64Reg<20, "x20", [W20]>, DwarfRegAlias<W20>;
def X21 : ARM64Reg<21, "x21", [W21]>, DwarfRegAlias<W21>;
def X22 : ARM64Reg<22, "x22", [W22]>, DwarfRegAlias<W22>;
def X23 : ARM64Reg<23, "x23", [W23]>, DwarfRegAlias<W23>;
def X24 : ARM64Reg<24, "x24", [W24]>, DwarfRegAlias<W24>;
def X25 : ARM64Reg<25, "x25", [W25]>, DwarfRegAlias<W25>;
def X26 : ARM64Reg<26, "x26", [W26]>, DwarfRegAlias<W26>;
def X27 : ARM64Reg<27, "x27", [W27]>, DwarfRegAlias<W27>;
def X28 : ARM64Reg<28, "x28", [W28]>, DwarfRegAlias<W28>;
def FP : ARM64Reg<29, "fp", [W29]>, DwarfRegAlias<W29>;
def LR : ARM64Reg<30, "lr", [W30]>, DwarfRegAlias<W30>;
def SP : ARM64Reg<31, "sp", [WSP]>, DwarfRegAlias<WSP>;
def XZR : ARM64Reg<31, "xzr", [WZR]>, DwarfRegAlias<WSP>;
}
// Condition code register.
def CPSR : ARM64Reg<0, "cpsr">;
// GPR register classes with the intersections of GPR32/GPR32sp and
// GPR64/GPR64sp for use by the coalescer.
def GPR32common : RegisterClass<"ARM64", [i32], 32, (sequence "W%u", 0, 30)> {
let AltOrders = [(rotl GPR32common, 8)];
let AltOrderSelect = [{ return 1; }];
}
def GPR64common : RegisterClass<"ARM64", [i64], 64,
(add (sequence "X%u", 0, 28), FP, LR)> {
let AltOrders = [(rotl GPR64common, 8)];
let AltOrderSelect = [{ return 1; }];
}
// GPR register classes which exclude SP/WSP.
def GPR32 : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR)> {
let AltOrders = [(rotl GPR32, 8)];
let AltOrderSelect = [{ return 1; }];
}
def GPR64 : RegisterClass<"ARM64", [i64], 64, (add GPR64common, XZR)> {
let AltOrders = [(rotl GPR64, 8)];
let AltOrderSelect = [{ return 1; }];
}
// GPR register classes which include SP/WSP.
def GPR32sp : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WSP)> {
let AltOrders = [(rotl GPR32sp, 8)];
let AltOrderSelect = [{ return 1; }];
}
def GPR64sp : RegisterClass<"ARM64", [i64], 64, (add GPR64common, SP)> {
let AltOrders = [(rotl GPR64sp, 8)];
let AltOrderSelect = [{ return 1; }];
}
// GPR register classes which include WZR/XZR AND SP/WSP. This is not a
// constraint used by any instructions, it is used as a common super-class.
def GPR32all : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR, WSP)>;
def GPR64all : RegisterClass<"ARM64", [i64], 64, (add GPR64common, XZR, SP)>;
// For tail calls, we can't use callee-saved registers, as they are restored
// to the saved value before the tail call, which would clobber a call address.
// This is for indirect tail calls to store the address of the destination.
def tcGPR64 : RegisterClass<"ARM64", [i64], 64, (sub GPR64common, X19, X20, X21,
X22, X23, X24, X25, X26,
X27, X28)>;
// GPR register classes for post increment ammount of vector load/store that
// has alternate printing when Rm=31 and prints a constant immediate value
// equal to the total number of bytes transferred.
def GPR64pi1 : RegisterOperand<GPR64, "printPostIncOperand1">;
def GPR64pi2 : RegisterOperand<GPR64, "printPostIncOperand2">;
def GPR64pi3 : RegisterOperand<GPR64, "printPostIncOperand3">;
def GPR64pi4 : RegisterOperand<GPR64, "printPostIncOperand4">;
def GPR64pi6 : RegisterOperand<GPR64, "printPostIncOperand6">;
def GPR64pi8 : RegisterOperand<GPR64, "printPostIncOperand8">;
def GPR64pi12 : RegisterOperand<GPR64, "printPostIncOperand12">;
def GPR64pi16 : RegisterOperand<GPR64, "printPostIncOperand16">;
def GPR64pi24 : RegisterOperand<GPR64, "printPostIncOperand24">;
def GPR64pi32 : RegisterOperand<GPR64, "printPostIncOperand32">;
def GPR64pi48 : RegisterOperand<GPR64, "printPostIncOperand48">;
def GPR64pi64 : RegisterOperand<GPR64, "printPostIncOperand64">;
// Condition code regclass.
def CCR : RegisterClass<"ARM64", [i32], 32, (add CPSR)> {
let CopyCost = -1; // Don't allow copying of status registers.
// CCR is not allocatable.
let isAllocatable = 0;
}
//===----------------------------------------------------------------------===//
// Floating Point Scalar Registers
//===----------------------------------------------------------------------===//
def B0 : ARM64Reg<0, "b0">, DwarfRegNum<[64]>;
def B1 : ARM64Reg<1, "b1">, DwarfRegNum<[65]>;
def B2 : ARM64Reg<2, "b2">, DwarfRegNum<[66]>;
def B3 : ARM64Reg<3, "b3">, DwarfRegNum<[67]>;
def B4 : ARM64Reg<4, "b4">, DwarfRegNum<[68]>;
def B5 : ARM64Reg<5, "b5">, DwarfRegNum<[69]>;
def B6 : ARM64Reg<6, "b6">, DwarfRegNum<[70]>;
def B7 : ARM64Reg<7, "b7">, DwarfRegNum<[71]>;
def B8 : ARM64Reg<8, "b8">, DwarfRegNum<[72]>;
def B9 : ARM64Reg<9, "b9">, DwarfRegNum<[73]>;
def B10 : ARM64Reg<10, "b10">, DwarfRegNum<[74]>;
def B11 : ARM64Reg<11, "b11">, DwarfRegNum<[75]>;
def B12 : ARM64Reg<12, "b12">, DwarfRegNum<[76]>;
def B13 : ARM64Reg<13, "b13">, DwarfRegNum<[77]>;
def B14 : ARM64Reg<14, "b14">, DwarfRegNum<[78]>;
def B15 : ARM64Reg<15, "b15">, DwarfRegNum<[79]>;
def B16 : ARM64Reg<16, "b16">, DwarfRegNum<[80]>;
def B17 : ARM64Reg<17, "b17">, DwarfRegNum<[81]>;
def B18 : ARM64Reg<18, "b18">, DwarfRegNum<[82]>;
def B19 : ARM64Reg<19, "b19">, DwarfRegNum<[83]>;
def B20 : ARM64Reg<20, "b20">, DwarfRegNum<[84]>;
def B21 : ARM64Reg<21, "b21">, DwarfRegNum<[85]>;
def B22 : ARM64Reg<22, "b22">, DwarfRegNum<[86]>;
def B23 : ARM64Reg<23, "b23">, DwarfRegNum<[87]>;
def B24 : ARM64Reg<24, "b24">, DwarfRegNum<[88]>;
def B25 : ARM64Reg<25, "b25">, DwarfRegNum<[89]>;
def B26 : ARM64Reg<26, "b26">, DwarfRegNum<[90]>;
def B27 : ARM64Reg<27, "b27">, DwarfRegNum<[91]>;
def B28 : ARM64Reg<28, "b28">, DwarfRegNum<[92]>;
def B29 : ARM64Reg<29, "b29">, DwarfRegNum<[93]>;
def B30 : ARM64Reg<30, "b30">, DwarfRegNum<[94]>;
def B31 : ARM64Reg<31, "b31">, DwarfRegNum<[95]>;
let SubRegIndices = [bsub] in {
def H0 : ARM64Reg<0, "h0", [B0]>, DwarfRegAlias<B0>;
def H1 : ARM64Reg<1, "h1", [B1]>, DwarfRegAlias<B1>;
def H2 : ARM64Reg<2, "h2", [B2]>, DwarfRegAlias<B2>;
def H3 : ARM64Reg<3, "h3", [B3]>, DwarfRegAlias<B3>;
def H4 : ARM64Reg<4, "h4", [B4]>, DwarfRegAlias<B4>;
def H5 : ARM64Reg<5, "h5", [B5]>, DwarfRegAlias<B5>;
def H6 : ARM64Reg<6, "h6", [B6]>, DwarfRegAlias<B6>;
def H7 : ARM64Reg<7, "h7", [B7]>, DwarfRegAlias<B7>;
def H8 : ARM64Reg<8, "h8", [B8]>, DwarfRegAlias<B8>;
def H9 : ARM64Reg<9, "h9", [B9]>, DwarfRegAlias<B9>;
def H10 : ARM64Reg<10, "h10", [B10]>, DwarfRegAlias<B10>;
def H11 : ARM64Reg<11, "h11", [B11]>, DwarfRegAlias<B11>;
def H12 : ARM64Reg<12, "h12", [B12]>, DwarfRegAlias<B12>;
def H13 : ARM64Reg<13, "h13", [B13]>, DwarfRegAlias<B13>;
def H14 : ARM64Reg<14, "h14", [B14]>, DwarfRegAlias<B14>;
def H15 : ARM64Reg<15, "h15", [B15]>, DwarfRegAlias<B15>;
def H16 : ARM64Reg<16, "h16", [B16]>, DwarfRegAlias<B16>;
def H17 : ARM64Reg<17, "h17", [B17]>, DwarfRegAlias<B17>;
def H18 : ARM64Reg<18, "h18", [B18]>, DwarfRegAlias<B18>;
def H19 : ARM64Reg<19, "h19", [B19]>, DwarfRegAlias<B19>;
def H20 : ARM64Reg<20, "h20", [B20]>, DwarfRegAlias<B20>;
def H21 : ARM64Reg<21, "h21", [B21]>, DwarfRegAlias<B21>;
def H22 : ARM64Reg<22, "h22", [B22]>, DwarfRegAlias<B22>;
def H23 : ARM64Reg<23, "h23", [B23]>, DwarfRegAlias<B23>;
def H24 : ARM64Reg<24, "h24", [B24]>, DwarfRegAlias<B24>;
def H25 : ARM64Reg<25, "h25", [B25]>, DwarfRegAlias<B25>;
def H26 : ARM64Reg<26, "h26", [B26]>, DwarfRegAlias<B26>;
def H27 : ARM64Reg<27, "h27", [B27]>, DwarfRegAlias<B27>;
def H28 : ARM64Reg<28, "h28", [B28]>, DwarfRegAlias<B28>;
def H29 : ARM64Reg<29, "h29", [B29]>, DwarfRegAlias<B29>;
def H30 : ARM64Reg<30, "h30", [B30]>, DwarfRegAlias<B30>;
def H31 : ARM64Reg<31, "h31", [B31]>, DwarfRegAlias<B31>;
}
let SubRegIndices = [hsub] in {
def S0 : ARM64Reg<0, "s0", [H0]>, DwarfRegAlias<B0>;
def S1 : ARM64Reg<1, "s1", [H1]>, DwarfRegAlias<B1>;
def S2 : ARM64Reg<2, "s2", [H2]>, DwarfRegAlias<B2>;
def S3 : ARM64Reg<3, "s3", [H3]>, DwarfRegAlias<B3>;
def S4 : ARM64Reg<4, "s4", [H4]>, DwarfRegAlias<B4>;
def S5 : ARM64Reg<5, "s5", [H5]>, DwarfRegAlias<B5>;
def S6 : ARM64Reg<6, "s6", [H6]>, DwarfRegAlias<B6>;
def S7 : ARM64Reg<7, "s7", [H7]>, DwarfRegAlias<B7>;
def S8 : ARM64Reg<8, "s8", [H8]>, DwarfRegAlias<B8>;
def S9 : ARM64Reg<9, "s9", [H9]>, DwarfRegAlias<B9>;
def S10 : ARM64Reg<10, "s10", [H10]>, DwarfRegAlias<B10>;
def S11 : ARM64Reg<11, "s11", [H11]>, DwarfRegAlias<B11>;
def S12 : ARM64Reg<12, "s12", [H12]>, DwarfRegAlias<B12>;
def S13 : ARM64Reg<13, "s13", [H13]>, DwarfRegAlias<B13>;
def S14 : ARM64Reg<14, "s14", [H14]>, DwarfRegAlias<B14>;
def S15 : ARM64Reg<15, "s15", [H15]>, DwarfRegAlias<B15>;
def S16 : ARM64Reg<16, "s16", [H16]>, DwarfRegAlias<B16>;
def S17 : ARM64Reg<17, "s17", [H17]>, DwarfRegAlias<B17>;
def S18 : ARM64Reg<18, "s18", [H18]>, DwarfRegAlias<B18>;
def S19 : ARM64Reg<19, "s19", [H19]>, DwarfRegAlias<B19>;
def S20 : ARM64Reg<20, "s20", [H20]>, DwarfRegAlias<B20>;
def S21 : ARM64Reg<21, "s21", [H21]>, DwarfRegAlias<B21>;
def S22 : ARM64Reg<22, "s22", [H22]>, DwarfRegAlias<B22>;
def S23 : ARM64Reg<23, "s23", [H23]>, DwarfRegAlias<B23>;
def S24 : ARM64Reg<24, "s24", [H24]>, DwarfRegAlias<B24>;
def S25 : ARM64Reg<25, "s25", [H25]>, DwarfRegAlias<B25>;
def S26 : ARM64Reg<26, "s26", [H26]>, DwarfRegAlias<B26>;
def S27 : ARM64Reg<27, "s27", [H27]>, DwarfRegAlias<B27>;
def S28 : ARM64Reg<28, "s28", [H28]>, DwarfRegAlias<B28>;
def S29 : ARM64Reg<29, "s29", [H29]>, DwarfRegAlias<B29>;
def S30 : ARM64Reg<30, "s30", [H30]>, DwarfRegAlias<B30>;
def S31 : ARM64Reg<31, "s31", [H31]>, DwarfRegAlias<B31>;
}
let SubRegIndices = [ssub], RegAltNameIndices = [vreg, vlist1] in {
def D0 : ARM64Reg<0, "d0", [S0], ["v0", ""]>, DwarfRegAlias<B0>;
def D1 : ARM64Reg<1, "d1", [S1], ["v1", ""]>, DwarfRegAlias<B1>;
def D2 : ARM64Reg<2, "d2", [S2], ["v2", ""]>, DwarfRegAlias<B2>;
def D3 : ARM64Reg<3, "d3", [S3], ["v3", ""]>, DwarfRegAlias<B3>;
def D4 : ARM64Reg<4, "d4", [S4], ["v4", ""]>, DwarfRegAlias<B4>;
def D5 : ARM64Reg<5, "d5", [S5], ["v5", ""]>, DwarfRegAlias<B5>;
def D6 : ARM64Reg<6, "d6", [S6], ["v6", ""]>, DwarfRegAlias<B6>;
def D7 : ARM64Reg<7, "d7", [S7], ["v7", ""]>, DwarfRegAlias<B7>;
def D8 : ARM64Reg<8, "d8", [S8], ["v8", ""]>, DwarfRegAlias<B8>;
def D9 : ARM64Reg<9, "d9", [S9], ["v9", ""]>, DwarfRegAlias<B9>;
def D10 : ARM64Reg<10, "d10", [S10], ["v10", ""]>, DwarfRegAlias<B10>;
def D11 : ARM64Reg<11, "d11", [S11], ["v11", ""]>, DwarfRegAlias<B11>;
def D12 : ARM64Reg<12, "d12", [S12], ["v12", ""]>, DwarfRegAlias<B12>;
def D13 : ARM64Reg<13, "d13", [S13], ["v13", ""]>, DwarfRegAlias<B13>;
def D14 : ARM64Reg<14, "d14", [S14], ["v14", ""]>, DwarfRegAlias<B14>;
def D15 : ARM64Reg<15, "d15", [S15], ["v15", ""]>, DwarfRegAlias<B15>;
def D16 : ARM64Reg<16, "d16", [S16], ["v16", ""]>, DwarfRegAlias<B16>;
def D17 : ARM64Reg<17, "d17", [S17], ["v17", ""]>, DwarfRegAlias<B17>;
def D18 : ARM64Reg<18, "d18", [S18], ["v18", ""]>, DwarfRegAlias<B18>;
def D19 : ARM64Reg<19, "d19", [S19], ["v19", ""]>, DwarfRegAlias<B19>;
def D20 : ARM64Reg<20, "d20", [S20], ["v20", ""]>, DwarfRegAlias<B20>;
def D21 : ARM64Reg<21, "d21", [S21], ["v21", ""]>, DwarfRegAlias<B21>;
def D22 : ARM64Reg<22, "d22", [S22], ["v22", ""]>, DwarfRegAlias<B22>;
def D23 : ARM64Reg<23, "d23", [S23], ["v23", ""]>, DwarfRegAlias<B23>;
def D24 : ARM64Reg<24, "d24", [S24], ["v24", ""]>, DwarfRegAlias<B24>;
def D25 : ARM64Reg<25, "d25", [S25], ["v25", ""]>, DwarfRegAlias<B25>;
def D26 : ARM64Reg<26, "d26", [S26], ["v26", ""]>, DwarfRegAlias<B26>;
def D27 : ARM64Reg<27, "d27", [S27], ["v27", ""]>, DwarfRegAlias<B27>;
def D28 : ARM64Reg<28, "d28", [S28], ["v28", ""]>, DwarfRegAlias<B28>;
def D29 : ARM64Reg<29, "d29", [S29], ["v29", ""]>, DwarfRegAlias<B29>;
def D30 : ARM64Reg<30, "d30", [S30], ["v30", ""]>, DwarfRegAlias<B30>;
def D31 : ARM64Reg<31, "d31", [S31], ["v31", ""]>, DwarfRegAlias<B31>;
}
let SubRegIndices = [dsub], RegAltNameIndices = [vreg, vlist1] in {
def Q0 : ARM64Reg<0, "q0", [D0], ["v0", ""]>, DwarfRegAlias<B0>;
def Q1 : ARM64Reg<1, "q1", [D1], ["v1", ""]>, DwarfRegAlias<B1>;
def Q2 : ARM64Reg<2, "q2", [D2], ["v2", ""]>, DwarfRegAlias<B2>;
def Q3 : ARM64Reg<3, "q3", [D3], ["v3", ""]>, DwarfRegAlias<B3>;
def Q4 : ARM64Reg<4, "q4", [D4], ["v4", ""]>, DwarfRegAlias<B4>;
def Q5 : ARM64Reg<5, "q5", [D5], ["v5", ""]>, DwarfRegAlias<B5>;
def Q6 : ARM64Reg<6, "q6", [D6], ["v6", ""]>, DwarfRegAlias<B6>;
def Q7 : ARM64Reg<7, "q7", [D7], ["v7", ""]>, DwarfRegAlias<B7>;
def Q8 : ARM64Reg<8, "q8", [D8], ["v8", ""]>, DwarfRegAlias<B8>;
def Q9 : ARM64Reg<9, "q9", [D9], ["v9", ""]>, DwarfRegAlias<B9>;
def Q10 : ARM64Reg<10, "q10", [D10], ["v10", ""]>, DwarfRegAlias<B10>;
def Q11 : ARM64Reg<11, "q11", [D11], ["v11", ""]>, DwarfRegAlias<B11>;
def Q12 : ARM64Reg<12, "q12", [D12], ["v12", ""]>, DwarfRegAlias<B12>;
def Q13 : ARM64Reg<13, "q13", [D13], ["v13", ""]>, DwarfRegAlias<B13>;
def Q14 : ARM64Reg<14, "q14", [D14], ["v14", ""]>, DwarfRegAlias<B14>;
def Q15 : ARM64Reg<15, "q15", [D15], ["v15", ""]>, DwarfRegAlias<B15>;
def Q16 : ARM64Reg<16, "q16", [D16], ["v16", ""]>, DwarfRegAlias<B16>;
def Q17 : ARM64Reg<17, "q17", [D17], ["v17", ""]>, DwarfRegAlias<B17>;
def Q18 : ARM64Reg<18, "q18", [D18], ["v18", ""]>, DwarfRegAlias<B18>;
def Q19 : ARM64Reg<19, "q19", [D19], ["v19", ""]>, DwarfRegAlias<B19>;
def Q20 : ARM64Reg<20, "q20", [D20], ["v20", ""]>, DwarfRegAlias<B20>;
def Q21 : ARM64Reg<21, "q21", [D21], ["v21", ""]>, DwarfRegAlias<B21>;
def Q22 : ARM64Reg<22, "q22", [D22], ["v22", ""]>, DwarfRegAlias<B22>;
def Q23 : ARM64Reg<23, "q23", [D23], ["v23", ""]>, DwarfRegAlias<B23>;
def Q24 : ARM64Reg<24, "q24", [D24], ["v24", ""]>, DwarfRegAlias<B24>;
def Q25 : ARM64Reg<25, "q25", [D25], ["v25", ""]>, DwarfRegAlias<B25>;
def Q26 : ARM64Reg<26, "q26", [D26], ["v26", ""]>, DwarfRegAlias<B26>;
def Q27 : ARM64Reg<27, "q27", [D27], ["v27", ""]>, DwarfRegAlias<B27>;
def Q28 : ARM64Reg<28, "q28", [D28], ["v28", ""]>, DwarfRegAlias<B28>;
def Q29 : ARM64Reg<29, "q29", [D29], ["v29", ""]>, DwarfRegAlias<B29>;
def Q30 : ARM64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias<B30>;
def Q31 : ARM64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>;
}
def FPR8 : RegisterClass<"ARM64", [untyped], 8, (sequence "B%u", 0, 31)> {
let Size = 8;
}
def FPR16 : RegisterClass<"ARM64", [untyped], 16, (sequence "H%u", 0, 31)> {
let Size = 16;
}
def FPR32 : RegisterClass<"ARM64", [f32, i32], 32,(sequence "S%u", 0, 31)>;
def FPR64 : RegisterClass<"ARM64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32,
v1i64],
64, (sequence "D%u", 0, 31)>;
// We don't (yet) have an f128 legal type, so don't use that here. We
// normalize 128-bit vectors to v2f64 for arg passing and such, so use
// that here.
def FPR128 : RegisterClass<"ARM64",
[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128],
128, (sequence "Q%u", 0, 31)>;
// The lower 16 vector registers. Some instructions can only take registers
// in this range.
def FPR128_lo : RegisterClass<"ARM64",
[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
128, (trunc FPR128, 16)>;
// Pairs, triples, and quads of 64-bit vector registers.
def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>;
def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2],
[(rotl FPR64, 0), (rotl FPR64, 1),
(rotl FPR64, 2)]>;
def DSeqQuads : RegisterTuples<[dsub0, dsub1, dsub2, dsub3],
[(rotl FPR64, 0), (rotl FPR64, 1),
(rotl FPR64, 2), (rotl FPR64, 3)]>;
def DD : RegisterClass<"ARM64", [untyped], 64, (add DSeqPairs)> {
let Size = 128;
}
def DDD : RegisterClass<"ARM64", [untyped], 64, (add DSeqTriples)> {
let Size = 196;
}
def DDDD : RegisterClass<"ARM64", [untyped], 64, (add DSeqQuads)> {
let Size = 256;
}
// Pairs, triples, and quads of 128-bit vector registers.
def QSeqPairs : RegisterTuples<[qsub0, qsub1], [(rotl FPR128, 0), (rotl FPR128, 1)]>;
def QSeqTriples : RegisterTuples<[qsub0, qsub1, qsub2],
[(rotl FPR128, 0), (rotl FPR128, 1),
(rotl FPR128, 2)]>;
def QSeqQuads : RegisterTuples<[qsub0, qsub1, qsub2, qsub3],
[(rotl FPR128, 0), (rotl FPR128, 1),
(rotl FPR128, 2), (rotl FPR128, 3)]>;
def QQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqPairs)> {
let Size = 256;
}
def QQQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqTriples)> {
let Size = 384;
}
def QQQQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqQuads)> {
let Size = 512;
}
// Vector operand versions of the FP registers. Alternate name printing and
// assmebler matching.
def VectorRegAsmOperand : AsmOperandClass { let Name = "VectorReg"; }
let ParserMatchClass = VectorRegAsmOperand in {
def V64 : RegisterOperand<FPR64, "printVRegOperand">;
def V128 : RegisterOperand<FPR128, "printVRegOperand">;
def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand">;
}
class TypedVecListAsmOperand<int count, int regsize, int lanes, string kind>
: AsmOperandClass {
let Name = "TypedVectorList" # count # "_" # lanes # kind;
let PredicateMethod
= "isTypedVectorList<" # count # ", " # lanes # ", '" # kind # "'>";
let RenderMethod = "addVectorList" # regsize # "Operands<" # count # ">";
}
class TypedVecListRegOperand<RegisterClass Reg, int lanes, string kind>
: RegisterOperand<Reg, "printTypedVectorList<" # lanes # ", '"
# kind # "'>">;
multiclass VectorList<int count, RegisterClass Reg64, RegisterClass Reg128> {
// With implicit types (probably on instruction instead). E.g. { v0, v1 }
def _64AsmOperand : AsmOperandClass {
let Name = NAME # "64";
let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">";
let RenderMethod = "addVectorList64Operands<" # count # ">";
}
def "64" : RegisterOperand<Reg64, "printImplicitlyTypedVectorList"> {
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_64AsmOperand");
}
def _128AsmOperand : AsmOperandClass {
let Name = NAME # "128";
let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">";
let RenderMethod = "addVectorList128Operands<" # count # ">";
}
def "128" : RegisterOperand<Reg128, "printImplicitlyTypedVectorList"> {
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_128AsmOperand");
}
// 64-bit register lists with explicit type.
// { v0.8b, v1.8b }
def _8bAsmOperand : TypedVecListAsmOperand<count, 64, 8, "b">;
def "8b" : TypedVecListRegOperand<Reg64, 8, "b"> {
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8bAsmOperand");
}
// { v0.4h, v1.4h }
def _4hAsmOperand : TypedVecListAsmOperand<count, 64, 4, "h">;
def "4h" : TypedVecListRegOperand<Reg64, 4, "h"> {
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4hAsmOperand");
}
// { v0.2s, v1.2s }
def _2sAsmOperand : TypedVecListAsmOperand<count, 64, 2, "s">;
def "2s" : TypedVecListRegOperand<Reg64, 2, "s"> {
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2sAsmOperand");
}
// { v0.1d, v1.1d }
def _1dAsmOperand : TypedVecListAsmOperand<count, 64, 1, "d">;
def "1d" : TypedVecListRegOperand<Reg64, 1, "d"> {
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_1dAsmOperand");
}
// 128-bit register lists with explicit type
// { v0.16b, v1.16b }
def _16bAsmOperand : TypedVecListAsmOperand<count, 128, 16, "b">;
def "16b" : TypedVecListRegOperand<Reg128, 16, "b"> {
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_16bAsmOperand");
}
// { v0.8h, v1.8h }
def _8hAsmOperand : TypedVecListAsmOperand<count, 128, 8, "h">;
def "8h" : TypedVecListRegOperand<Reg128, 8, "h"> {
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8hAsmOperand");
}
// { v0.4s, v1.4s }
def _4sAsmOperand : TypedVecListAsmOperand<count, 128, 4, "s">;
def "4s" : TypedVecListRegOperand<Reg128, 4, "s"> {
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4sAsmOperand");
}
// { v0.2d, v1.2d }
def _2dAsmOperand : TypedVecListAsmOperand<count, 128, 2, "d">;
def "2d" : TypedVecListRegOperand<Reg128, 2, "d"> {
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2dAsmOperand");
}
// { v0.b, v1.b }
def _bAsmOperand : TypedVecListAsmOperand<count, 128, 0, "b">;
def "b" : TypedVecListRegOperand<Reg128, 0, "b"> {
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_bAsmOperand");
}
// { v0.h, v1.h }
def _hAsmOperand : TypedVecListAsmOperand<count, 128, 0, "h">;
def "h" : TypedVecListRegOperand<Reg128, 0, "h"> {
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_hAsmOperand");
}
// { v0.s, v1.s }
def _sAsmOperand : TypedVecListAsmOperand<count, 128, 0, "s">;
def "s" : TypedVecListRegOperand<Reg128, 0, "s"> {
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_sAsmOperand");
}
// { v0.d, v1.d }
def _dAsmOperand : TypedVecListAsmOperand<count, 128, 0, "d">;
def "d" : TypedVecListRegOperand<Reg128, 0, "d"> {
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_dAsmOperand");
}
}
defm VecListOne : VectorList<1, FPR64, FPR128>;
defm VecListTwo : VectorList<2, DD, QQ>;
defm VecListThree : VectorList<3, DDD, QQQ>;
defm VecListFour : VectorList<4, DDDD, QQQQ>;
// Register operand versions of the scalar FP registers.
def FPR16Op : RegisterOperand<FPR16, "printOperand">;
def FPR32Op : RegisterOperand<FPR32, "printOperand">;
def FPR64Op : RegisterOperand<FPR64, "printOperand">;
def FPR128Op : RegisterOperand<FPR128, "printOperand">;

View File

@ -0,0 +1,852 @@
//=- ARMSchedCyclone.td - ARM64 Cyclone Scheduling Defs ------*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the machine model for ARM64 Cyclone to support
// instruction scheduling and other instruction cost heuristics.
//
//===----------------------------------------------------------------------===//
def CycloneModel : SchedMachineModel {
let IssueWidth = 6; // 6 micro-ops are dispatched per cycle.
let MicroOpBufferSize = 192; // Based on the reorder buffer.
let LoadLatency = 4; // Optimistic load latency.
let MispredictPenalty = 16; // 14-19 cycles are typical.
}
//===----------------------------------------------------------------------===//
// Define each kind of processor resource and number available on Cyclone.
// 4 integer pipes
def CyUnitI : ProcResource<4> {
let BufferSize = 48;
}
// 2 branch units: I[0..1]
def CyUnitB : ProcResource<2> {
let Super = CyUnitI;
let BufferSize = 24;
}
// 1 indirect-branch unit: I[0]
def CyUnitBR : ProcResource<1> {
let Super = CyUnitB;
}
// 2 shifter pipes: I[2..3]
// When an instruction consumes a CyUnitIS, it also consumes a CyUnitI
def CyUnitIS : ProcResource<2> {
let Super = CyUnitI;
let BufferSize = 24;
}
// 1 mul pipe: I[0]
def CyUnitIM : ProcResource<1> {
let Super = CyUnitBR;
let BufferSize = 32;
}
// 1 div pipe: I[1]
def CyUnitID : ProcResource<1> {
let Super = CyUnitB;
let BufferSize = 16;
}
// 1 integer division unit. This is driven by the ID pipe, but only
// consumes the pipe for one cycle at issue and another cycle at writeback.
def CyUnitIntDiv : ProcResource<1>;
// 2 ld/st pipes.
def CyUnitLS : ProcResource<2> {
let BufferSize = 28;
}
// 3 fp/vector pipes.
def CyUnitV : ProcResource<3> {
let BufferSize = 48;
}
// 2 fp/vector arithmetic and multiply pipes: V[0-1]
def CyUnitVM : ProcResource<2> {
let Super = CyUnitV;
let BufferSize = 32;
}
// 1 fp/vector division/sqrt pipe: V[2]
def CyUnitVD : ProcResource<1> {
let Super = CyUnitV;
let BufferSize = 16;
}
// 1 fp compare pipe: V[0]
def CyUnitVC : ProcResource<1> {
let Super = CyUnitVM;
let BufferSize = 16;
}
// 2 fp division/square-root units. These are driven by the VD pipe,
// but only consume the pipe for one cycle at issue and a cycle at writeback.
def CyUnitFloatDiv : ProcResource<2>;
//===----------------------------------------------------------------------===//
// Define scheduler read/write resources and latency on Cyclone.
// This mirrors sections 7.7-7.9 of the Tuning Guide v1.0.1.
let SchedModel = CycloneModel in {
//---
// 7.8.1. Moves
//---
// A single nop micro-op (uX).
def WriteX : SchedWriteRes<[]> { let Latency = 0; }
// Move zero is a register rename (to machine register zero).
// The move is replaced by a single nop micro-op.
// MOVZ Rd, #0
// AND Rd, Rzr, #imm
def WriteZPred : SchedPredicate<[{TII->isGPRZero(MI)}]>;
def WriteImmZ : SchedWriteVariant<[
SchedVar<WriteZPred, [WriteX]>,
SchedVar<NoSchedPred, [WriteImm]>]>;
def : InstRW<[WriteImmZ], (instrs MOVZWi,MOVZXi,ANDWri,ANDXri)>;
// Move GPR is a register rename and single nop micro-op.
// ORR Xd, XZR, Xm
// ADD Xd, Xn, #0
def WriteIMovPred : SchedPredicate<[{TII->isGPRCopy(MI)}]>;
def WriteVMovPred : SchedPredicate<[{TII->isFPRCopy(MI)}]>;
def WriteMov : SchedWriteVariant<[
SchedVar<WriteIMovPred, [WriteX]>,
SchedVar<WriteVMovPred, [WriteX]>,
SchedVar<NoSchedPred, [WriteI]>]>;
def : InstRW<[WriteMov], (instrs COPY,ORRXrr,ADDXrr)>;
// Move non-zero immediate is an integer ALU op.
// MOVN,MOVZ,MOVK
def : WriteRes<WriteImm, [CyUnitI]>;
//---
// 7.8.2-7.8.5. Arithmetic and Logical, Comparison, Conditional,
// Shifts and Bitfield Operations
//---
// ADR,ADRP
// ADD(S)ri,SUB(S)ri,AND(S)ri,EORri,ORRri
// ADD(S)rr,SUB(S)rr,AND(S)rr,BIC(S)rr,EONrr,EORrr,ORNrr,ORRrr
// ADC(S),SBC(S)
// Aliases: CMN, CMP, TST
//
// Conditional operations.
// CCMNi,CCMPi,CCMNr,CCMPr,
// CSEL,CSINC,CSINV,CSNEG
//
// Bit counting and reversal operations.
// CLS,CLZ,RBIT,REV,REV16,REV32
def : WriteRes<WriteI, [CyUnitI]>;
// ADD with shifted register operand is a single micro-op that
// consumes a shift pipeline for two cycles.
// ADD(S)rs,SUB(S)rs,AND(S)rs,BIC(S)rs,EONrs,EORrs,ORNrs,ORRrs
// EXAMPLE: ADDrs Xn, Xm LSL #imm
def : WriteRes<WriteISReg, [CyUnitIS]> {
let Latency = 2;
let ResourceCycles = [2];
}
// ADD with extended register operand is the same as shifted reg operand.
// ADD(S)re,SUB(S)re
// EXAMPLE: ADDXre Xn, Xm, UXTB #1
def : WriteRes<WriteIEReg, [CyUnitIS]> {
let Latency = 2;
let ResourceCycles = [2];
}
// Variable shift and bitfield operations.
// ASRV,LSLV,LSRV,RORV,BFM,SBFM,UBFM
def : WriteRes<WriteIS, [CyUnitIS]>;
// EXTR Shifts a pair of registers and requires two micro-ops.
// The second micro-op is delayed, as modeled by ReadExtrHi.
// EXTR Xn, Xm, #imm
def : WriteRes<WriteExtr, [CyUnitIS, CyUnitIS]> {
let Latency = 2;
let NumMicroOps = 2;
}
// EXTR's first register read is delayed by one cycle, effectively
// shortening its writer's latency.
// EXTR Xn, Xm, #imm
def : ReadAdvance<ReadExtrHi, 1>;
//---
// 7.8.6. Multiplies
//---
// MUL/MNEG are aliases for MADD/MSUB.
// MADDW,MSUBW,SMADDL,SMSUBL,UMADDL,UMSUBL
def : WriteRes<WriteIM32, [CyUnitIM]> {
let Latency = 4;
}
// MADDX,MSUBX,SMULH,UMULH
def : WriteRes<WriteIM64, [CyUnitIM]> {
let Latency = 5;
}
//---
// 7.8.7. Divide
//---
// 32-bit divide takes 7-13 cycles. 10 cycles covers a 20-bit quotient.
// The ID pipe is consumed for 2 cycles: issue and writeback.
// SDIVW,UDIVW
def : WriteRes<WriteID32, [CyUnitID, CyUnitIntDiv]> {
let Latency = 10;
let ResourceCycles = [2, 10];
}
// 64-bit divide takes 7-21 cycles. 13 cycles covers a 32-bit quotient.
// The ID pipe is consumed for 2 cycles: issue and writeback.
// SDIVX,UDIVX
def : WriteRes<WriteID64, [CyUnitID, CyUnitIntDiv]> {
let Latency = 13;
let ResourceCycles = [2, 13];
}
//---
// 7.8.8,7.8.10. Load/Store, single element
//---
// Integer loads take 4 cycles and use one LS unit for one cycle.
def : WriteRes<WriteLD, [CyUnitLS]> {
let Latency = 4;
}
// Store-load forwarding is 4 cycles.
//
// Note: The store-exclusive sequence incorporates this
// latency. However, general heuristics should not model the
// dependence between a store and subsequent may-alias load because
// hardware speculation works.
def : WriteRes<WriteST, [CyUnitLS]> {
let Latency = 4;
}
// Load from base address plus an optionally scaled register offset.
// Rt latency is latency WriteIS + WriteLD.
// EXAMPLE: LDR Xn, Xm [, lsl 3]
def CyWriteLDIdx : SchedWriteVariant<[
SchedVar<ScaledIdxPred, [WriteIS, WriteLD]>, // Load from scaled register.
SchedVar<NoSchedPred, [WriteLD]>]>; // Load from register offset.
def : SchedAlias<WriteLDIdx, CyWriteLDIdx>; // Map ARM64->Cyclone type.
// EXAMPLE: STR Xn, Xm [, lsl 3]
def CyWriteSTIdx : SchedWriteVariant<[
SchedVar<ScaledIdxPred, [WriteIS, WriteST]>, // Store to scaled register.
SchedVar<NoSchedPred, [WriteST]>]>; // Store to register offset.
def : SchedAlias<WriteSTIdx, CyWriteSTIdx>; // Map ARM64->Cyclone type.
// Read the (unshifted) base register Xn in the second micro-op one cycle later.
// EXAMPLE: LDR Xn, Xm [, lsl 3]
def ReadBaseRS : SchedReadAdvance<1>;
def CyReadAdrBase : SchedReadVariant<[
SchedVar<ScaledIdxPred, [ReadBaseRS]>, // Read base reg after shifting offset.
SchedVar<NoSchedPred, [ReadDefault]>]>; // Read base reg with no shift.
def : SchedAlias<ReadAdrBase, CyReadAdrBase>; // Map ARM64->Cyclone type.
//---
// 7.8.9,7.8.11. Load/Store, paired
//---
// Address pre/post increment is a simple ALU op with one cycle latency.
def : WriteRes<WriteAdr, [CyUnitI]>;
// LDP high register write is fused with the load, but a nop micro-op remains.
def : WriteRes<WriteLDHi, []> {
let Latency = 4;
}
// STP is a vector op and store, except for QQ, which is just two stores.
def : SchedAlias<WriteSTP, WriteVSTShuffle>;
def : InstRW<[WriteST, WriteST], (instrs STPQi)>;
//---
// 7.8.13. Branches
//---
// Branches take a single micro-op.
// The misprediction penalty is defined as a SchedMachineModel property.
def : WriteRes<WriteBr, [CyUnitB]> {let Latency = 0;}
def : WriteRes<WriteBrReg, [CyUnitBR]> {let Latency = 0;}
//---
// 7.8.14. Never-issued Instructions, Barrier and Hint Operations
//---
// NOP,SEV,SEVL,WFE,WFI,YIELD
def : WriteRes<WriteHint, []> {let Latency = 0;}
// ISB
def : InstRW<[WriteI], (instrs ISB)>;
// SLREX,DMB,DSB
def : WriteRes<WriteBarrier, [CyUnitLS]>;
// System instructions get an invalid latency because the latency of
// other operations across them is meaningless.
def : WriteRes<WriteSys, []> {let Latency = -1;}
//===----------------------------------------------------------------------===//
// 7.9 Vector Unit Instructions
// Simple vector operations take 2 cycles.
def : WriteRes<WriteV, [CyUnitV]> {let Latency = 2;}
// Define some longer latency vector op types for Cyclone.
def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
def CyWriteV4 : SchedWriteRes<[CyUnitV]> {let Latency = 4;}
def CyWriteV5 : SchedWriteRes<[CyUnitV]> {let Latency = 5;}
def CyWriteV6 : SchedWriteRes<[CyUnitV]> {let Latency = 6;}
// Simple floating-point operations take 2 cycles.
def : WriteRes<WriteF, [CyUnitV]> {let Latency = 2;}
//---
// 7.9.1 Vector Moves
//---
// TODO: Add Cyclone-specific zero-cycle zeros. LLVM currently
// generates expensive int-float conversion instead:
// FMOVDi Dd, #0.0
// FMOVv2f64ns Vd.2d, #0.0
// FMOVSi,FMOVDi
def : WriteRes<WriteFImm, [CyUnitV]> {let Latency = 2;}
// MOVI,MVNI are WriteV
// FMOVv2f32ns,FMOVv2f64ns,FMOVv4f32ns are WriteV
// Move FPR is a register rename and single nop micro-op.
// ORR.16b Vd,Vn,Vn
// COPY is handled above in the WriteMov Variant.
def WriteVMov : SchedWriteVariant<[
SchedVar<WriteVMovPred, [WriteX]>,
SchedVar<NoSchedPred, [WriteV]>]>;
def : InstRW<[WriteVMov], (instrs ORRv16i8)>;
// FMOVSr,FMOVDr are WriteF.
// MOV V,V is a WriteV.
// CPY D,V[x] is a WriteV
// INS V[x],V[y] is a WriteV.
// FMOVWSr,FMOVXDr,FMOVXDHighr
def : SchedAlias<WriteFCopy, WriteVLD>;
// FMOVSWr,FMOVDXr
def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>;
// INS V[x],R
def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteV]>;
def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>;
// SMOV,UMOV R,V[x]
def CyWriteCopyToGPR : WriteSequence<[WriteLD, WriteI]>;
def : InstRW<[CyWriteCopyToGPR], (instregex "SMOVv","UMOVv")>;
// DUP V,R
def : InstRW<[CyWriteCopyToFPR], (instregex "DUPv")>;
// DUP V,V[x] is a WriteV.
//---
// 7.9.2 Integer Arithmetic, Logical, and Comparisons
//---
// BIC,ORR V,#imm are WriteV
def : InstRW<[CyWriteV3], (instregex "ABSv")>;
// MVN,NEG,NOT are WriteV
def : InstRW<[CyWriteV3], (instregex "SQABSv","SQNEGv")>;
// ADDP is a WriteV.
def CyWriteVADDLP : SchedWriteRes<[CyUnitV]> {let Latency = 2;}
def : InstRW<[CyWriteVADDLP], (instregex "SADDLPv","UADDLPv")>;
def : InstRW<[CyWriteV3],
(instregex "ADDVv","SMAXVv","UMAXVv","SMINVv","UMINVv")>;
def : InstRW<[CyWriteV3], (instregex "SADDLV","UADDLV")>;
// ADD,SUB are WriteV
// Forward declare.
def CyWriteVABD : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
// Add/Diff and accumulate uses the vector multiply unit.
def CyWriteVAccum : SchedWriteRes<[CyUnitVM]> {let Latency = 3;}
def CyReadVAccum : SchedReadAdvance<1,
[CyWriteVAccum, CyWriteVADDLP, CyWriteVABD]>;
def : InstRW<[CyWriteVAccum, CyReadVAccum],
(instregex "SADALP","UADALP")>;
def : InstRW<[CyWriteVAccum, CyReadVAccum],
(instregex "SABAv","UABAv","SABALv","UABALv")>;
def : InstRW<[CyWriteV3], (instregex "SQADDv","SQSUBv","UQADDv","UQSUBv")>;
def : InstRW<[CyWriteV3], (instregex "SUQADDv","USQADDv")>;
def : InstRW<[CyWriteV4], (instregex "ADDHNv","RADDHNv", "RSUBHNv", "SUBHNv")>;
// WriteV includes:
// AND,BIC,CMTST,EOR,ORN,ORR
// ADDP
// SHADD,SHSUB,SRHADD,UHADD,UHSUB,URHADD
// SADDL,SSUBL,UADDL,USUBL
// SADDW,SSUBW,UADDW,USUBW
def : InstRW<[CyWriteV3], (instregex "CMEQv","CMGEv","CMGTv",
"CMLEv","CMLTv",
"CMHIv","CMHSv")>;
def : InstRW<[CyWriteV3], (instregex "SMAXv","SMINv","UMAXv","UMINv",
"SMAXPv","SMINPv","UMAXPv","UMINPv")>;
def : InstRW<[CyWriteVABD], (instregex "SABDv","UABDv",
"SABDLv","UABDLv")>;
//---
// 7.9.3 Floating Point Arithmetic and Comparisons
//---
// FABS,FNEG are WriteF
def : InstRW<[CyWriteV4], (instrs FADDPv2i32p)>;
def : InstRW<[CyWriteV5], (instrs FADDPv2i64p)>;
def : InstRW<[CyWriteV3], (instregex "FMAXPv2i","FMAXNMPv2i",
"FMINPv2i","FMINNMPv2i")>;
def : InstRW<[CyWriteV4], (instregex "FMAXVv","FMAXNMVv","FMINVv","FMINNMVv")>;
def : InstRW<[CyWriteV4], (instrs FADDSrr,FADDv2f32,FADDv4f32,
FSUBSrr,FSUBv2f32,FSUBv4f32,
FADDPv2f32,FADDPv4f32,
FABD32,FABDv2f32,FABDv4f32)>;
def : InstRW<[CyWriteV5], (instrs FADDDrr,FADDv2f64,
FSUBDrr,FSUBv2f64,
FADDPv2f64,
FABD64,FABDv2f64)>;
def : InstRW<[CyWriteV3], (instregex "FCMEQ","FCMGT","FCMLE","FCMLT")>;
def : InstRW<[CyWriteV3], (instregex "FACGE","FACGT",
"FMAXS","FMAXD","FMAXv",
"FMINS","FMIND","FMINv",
"FMAXNMS","FMAXNMD","FMAXNMv",
"FMINNMS","FMINNMD","FMINNMv",
"FMAXPv2f","FMAXPv4f",
"FMINPv2f","FMINPv4f",
"FMAXNMPv2f","FMAXNMPv4f",
"FMINNMPv2f","FMINNMPv4f")>;
// FCMP,FCMPE,FCCMP,FCCMPE
def : WriteRes<WriteFCmp, [CyUnitVC]> {let Latency = 4;}
// FCSEL is a WriteF.
//---
// 7.9.4 Shifts and Bitfield Operations
//---
// SHL is a WriteV
def CyWriteVSHR : SchedWriteRes<[CyUnitV]> {let Latency = 2;}
def : InstRW<[CyWriteVSHR], (instregex "SSHRv","USHRv")>;
def CyWriteVSRSHR : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
def : InstRW<[CyWriteVSRSHR], (instregex "SRSHRv","URSHRv")>;
// Shift and accumulate uses the vector multiply unit.
def CyWriteVShiftAcc : SchedWriteRes<[CyUnitVM]> {let Latency = 3;}
def CyReadVShiftAcc : SchedReadAdvance<1,
[CyWriteVShiftAcc, CyWriteVSHR, CyWriteVSRSHR]>;
def : InstRW<[CyWriteVShiftAcc, CyReadVShiftAcc],
(instregex "SRSRAv","SSRAv","URSRAv","USRAv")>;
// SSHL,USHL are WriteV.
def : InstRW<[CyWriteV3], (instregex "SRSHLv","URSHLv")>;
// SQSHL,SQSHLU,UQSHL are WriteV.
def : InstRW<[CyWriteV3], (instregex "SQRSHLv","UQRSHLv")>;
// WriteV includes:
// SHLL,SSHLL,USHLL
// SLI,SRI
// BIF,BIT,BSL
// EXT
// CLS,CLZ,CNT,RBIT,REV16,REV32,REV64,XTN
// XTN2
def : InstRW<[CyWriteV4],
(instregex "RSHRNv","SHRNv",
"SQRSHRNv","SQRSHRUNv","SQSHRNv","SQSHRUNv",
"UQRSHRNv","UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>;
//---
// 7.9.5 Multiplication
//---
def CyWriteVMul : SchedWriteRes<[CyUnitVM]> { let Latency = 4;}
def : InstRW<[CyWriteVMul], (instregex "MULv","SMULLv","UMULLv",
"SQDMULLv","SQDMULHv","SQRDMULHv")>;
// FMUL,FMULX,FNMUL default to WriteFMul.
def : WriteRes<WriteFMul, [CyUnitVM]> { let Latency = 4;}
def CyWriteV64Mul : SchedWriteRes<[CyUnitVM]> { let Latency = 5;}
def : InstRW<[CyWriteV64Mul], (instrs FMULDrr,FMULv2f64,FMULv2i64_indexed,
FNMULDrr,FMULX64,FMULXv2f64,FMULXv2i64_indexed)>;
def CyReadVMulAcc : SchedReadAdvance<1, [CyWriteVMul, CyWriteV64Mul]>;
def : InstRW<[CyWriteVMul, CyReadVMulAcc],
(instregex "MLA","MLS","SMLAL","SMLSL","UMLAL","UMLSL",
"SQDMLAL","SQDMLSL")>;
def CyWriteSMul : SchedWriteRes<[CyUnitVM]> { let Latency = 8;}
def CyWriteDMul : SchedWriteRes<[CyUnitVM]> { let Latency = 10;}
def CyReadSMul : SchedReadAdvance<4, [CyWriteSMul]>;
def CyReadDMul : SchedReadAdvance<5, [CyWriteDMul]>;
def : InstRW<[CyWriteSMul, CyReadSMul],
(instrs FMADDSrrr,FMSUBSrrr,FNMADDSrrr,FNMSUBSrrr,
FMLAv2f32,FMLAv4f32,
FMLAv1i32_indexed,FMLAv1i64_indexed,FMLAv2i32_indexed)>;
def : InstRW<[CyWriteDMul, CyReadDMul],
(instrs FMADDDrrr,FMSUBDrrr,FNMADDDrrr,FNMSUBDrrr,
FMLAv2f64,FMLAv2i64_indexed,
FMLSv2f64,FMLSv2i64_indexed)>;
def CyWritePMUL : SchedWriteRes<[CyUnitVD]> { let Latency = 3; }
def : InstRW<[CyWritePMUL], (instregex "PMULv", "PMULLv")>;
//---
// 7.9.6 Divide and Square Root
//---
// FDIV,FSQRT
// TODO: Add 64-bit variant with 19 cycle latency.
// TODO: Specialize FSQRT for longer latency.
def : WriteRes<WriteFDiv, [CyUnitVD, CyUnitFloatDiv]> {
let Latency = 17;
let ResourceCycles = [2, 17];
}
def : InstRW<[CyWriteV4], (instregex "FRECPEv","FRECPXv","URECPEv","URSQRTEv")>;
def WriteFRSQRTE : SchedWriteRes<[CyUnitVM]> { let Latency = 4; }
def : InstRW<[WriteFRSQRTE], (instregex "FRSQRTEv")>;
def WriteFRECPS : SchedWriteRes<[CyUnitVM]> { let Latency = 8; }
def WriteFRSQRTS : SchedWriteRes<[CyUnitVM]> { let Latency = 10; }
def : InstRW<[WriteFRECPS], (instregex "FRECPSv")>;
def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>;
//---
// 7.9.7 Integer-FP Conversions
//---
// FCVT lengthen f16/s32
def : InstRW<[WriteV], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;
// FCVT,FCVTN,FCVTXN
// SCVTF,UCVTF V,V
// FRINT(AIMNPXZ) V,V
def : WriteRes<WriteFCvt, [CyUnitV]> {let Latency = 4;}
// SCVT/UCVT S/D, Rd = VLD5+V4: 9 cycles.
def CyWriteCvtToFPR : WriteSequence<[WriteVLD, CyWriteV4]>;
def : InstRW<[CyWriteCopyToFPR], (instregex "FCVT[AMNPZ][SU][SU][WX][SD]r")>;
// FCVT Rd, S/D = V6+LD4: 10 cycles
def CyWriteCvtToGPR : WriteSequence<[CyWriteV6, WriteLD]>;
def : InstRW<[CyWriteCvtToGPR], (instregex "[SU]CVTF[SU][WX][SD]r")>;
// FCVTL is a WriteV
//---
// 7.9.8-7.9.10 Cryptography, Data Transposition, Table Lookup
//---
def CyWriteCrypto2 : SchedWriteRes<[CyUnitVD]> {let Latency = 2;}
def : InstRW<[CyWriteCrypto2], (instrs AESIMCrr, AESMCrr, SHA1Hrr,
AESDrr, AESErr, SHA1SU1rr, SHA256SU0rr,
SHA1SU0rrr)>;
def CyWriteCrypto3 : SchedWriteRes<[CyUnitVD]> {let Latency = 3;}
def : InstRW<[CyWriteCrypto3], (instrs SHA256SU1rrr)>;
def CyWriteCrypto6 : SchedWriteRes<[CyUnitVD]> {let Latency = 6;}
def : InstRW<[CyWriteCrypto6], (instrs SHA1Crrr, SHA1Mrrr, SHA1Prrr,
SHA256Hrrr,SHA256H2rrr)>;
// TRN,UZP,ZUP are WriteV.
// TBL,TBX are WriteV.
//---
// 7.9.11-7.9.14 Load/Store, single element and paired
//---
// Loading into the vector unit takes 5 cycles vs 4 for integer loads.
def : WriteRes<WriteVLD, [CyUnitLS]> {
let Latency = 5;
}
// Store-load forwarding is 4 cycles.
def : WriteRes<WriteVST, [CyUnitLS]> {
let Latency = 4;
}
// WriteVLDPair/VSTPair sequences are expanded by the target description.
//---
// 7.9.15 Load, element operations
//---
// Only the first WriteVLD and WriteAdr for writeback matches def operands.
// Subsequent WriteVLDs consume resources. Since all loaded values have the
// same latency, this is acceptable.
// Vd is read 5 cycles after issuing the vector load.
def : ReadAdvance<ReadVLD, 5>;
def : InstRW<[WriteVLD],
(instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[WriteVLD, WriteAdr],
(instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
// Register writes from the load's high half are fused micro-ops.
def : InstRW<[WriteVLD],
(instregex "LD1Twov(8b|4h|2s|1d)$")>;
def : InstRW<[WriteVLD, WriteAdr],
(instregex "LD1Twov(8b|4h|2s|1d)_POST")>;
def : InstRW<[WriteVLD, WriteVLD],
(instregex "LD1Twov(16b|8h|4s|2d)$")>;
def : InstRW<[WriteVLD, WriteAdr, WriteVLD],
(instregex "LD1Twov(16b|8h|4s|2d)_POST")>;
def : InstRW<[WriteVLD, WriteVLD],
(instregex "LD1Threev(8b|4h|2s|1d)$")>;
def : InstRW<[WriteVLD, WriteAdr, WriteVLD],
(instregex "LD1Threev(8b|4h|2s|1d)_POST")>;
def : InstRW<[WriteVLD, WriteVLD, WriteVLD],
(instregex "LD1Threev(16b|8h|4s|2d)$")>;
def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD],
(instregex "LD1Threev(16b|8h|4s|2d)_POST")>;
def : InstRW<[WriteVLD, WriteVLD],
(instregex "LD1Fourv(8b|4h|2s|1d)$")>;
def : InstRW<[WriteVLD, WriteAdr, WriteVLD],
(instregex "LD1Fourv(8b|4h|2s|1d)_POST")>;
def : InstRW<[WriteVLD, WriteVLD, WriteVLD, WriteVLD],
(instregex "LD1Fourv(16b|8h|4s|2d)$")>;
def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD, WriteVLD],
(instregex "LD1Fourv(16b|8h|4s|2d)_POST")>;
def : InstRW<[WriteVLDShuffle, ReadVLD],
(instregex "LD1i(8|16|32)$")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr],
(instregex "LD1i(8|16|32)_POST")>;
def : InstRW<[WriteVLDShuffle, ReadVLD], (instrs LD1i64)>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr],(instrs LD1i64_POST)>;
def : InstRW<[WriteVLDShuffle],
(instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr],
(instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
def : InstRW<[WriteVLDShuffle, WriteV],
(instregex "LD2Twov(8b|4h|2s)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
(instregex "LD2Twov(8b|4h|2s)_POST$")>;
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle],
(instregex "LD2Twov(16b|8h|4s|2d)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle],
(instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
(instregex "LD2i(8|16|32)$")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
(instregex "LD2i(8|16|32)_POST")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
(instregex "LD2i64$")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
(instregex "LD2i64_POST")>;
def : InstRW<[WriteVLDShuffle, WriteV],
(instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
(instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
(instregex "LD3Threev(8b|4h|2s)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
(instregex "LD3Threev(8b|4h|2s)_POST")>;
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle],
(instregex "LD3Threev(16b|8h|4s|2d)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle],
(instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV],
(instregex "LD3i(8|16|32)$")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV],
(instregex "LD3i(8|16|32)_POST")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV],
(instregex "LD3i64$")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
(instregex "LD3i64_POST")>;
def : InstRW<[WriteVLDShuffle, WriteV, WriteV],
(instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV],
(instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>;
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
(instrs LD3Rv1d,LD3Rv2d)>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
(instrs LD3Rv2d_POST,LD3Rv2d_POST)>;
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
(instregex "LD4Fourv(8b|4h|2s)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
(instregex "LD4Fourv(8b|4h|2s)_POST")>;
def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle,
WriteVLDPairShuffle, WriteVLDPairShuffle],
(instregex "LD4Fourv(16b|8h|4s|2d)$")>;
def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle,
WriteVLDPairShuffle, WriteVLDPairShuffle],
(instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV, WriteV],
(instregex "LD4i(8|16|32)$")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV, WriteV],
(instregex "LD4i(8|16|32)_POST")>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV, WriteV],
(instrs LD4i64)>;
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
(instrs LD4i64_POST)>;
def : InstRW<[WriteVLDShuffle, WriteV, WriteV, WriteV],
(instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV, WriteV],
(instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>;
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
(instrs LD4Rv1d,LD4Rv2d)>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
(instrs LD4Rv1d_POST,LD4Rv2d_POST)>;
//---
// 7.9.16 Store, element operations
//---
// Only the WriteAdr for writeback matches a def operands.
// Subsequent WriteVLDs only consume resources.
def : InstRW<[WriteVST],
(instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
def : InstRW<[WriteAdr, WriteVST],
(instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
def : InstRW<[WriteVSTShuffle],
(instregex "ST1Twov(8b|4h|2s|1d)$")>;
def : InstRW<[WriteAdr, WriteVSTShuffle],
(instregex "ST1Twov(8b|4h|2s|1d)_POST")>;
def : InstRW<[WriteVST, WriteVST],
(instregex "ST1Twov(16b|8h|4s|2d)$")>;
def : InstRW<[WriteAdr, WriteVST, WriteVST],
(instregex "ST1Twov(16b|8h|4s|2d)_POST")>;
def : InstRW<[WriteVSTShuffle, WriteVST],
(instregex "ST1Threev(8b|4h|2s|1d)$")>;
def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVST],
(instregex "ST1Threev(8b|4h|2s|1d)_POST")>;
def : InstRW<[WriteVST, WriteVST, WriteVST],
(instregex "ST1Threev(16b|8h|4s|2d)$")>;
def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST],
(instregex "ST1Threev(16b|8h|4s|2d)_POST")>;
def : InstRW<[WriteVSTShuffle, WriteVSTShuffle],
(instregex "ST1Fourv(8b|4h|2s|1d)$")>;
def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],
(instregex "ST1Fourv(8b|4h|2s|1d)_POST")>;
def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST],
(instregex "ST1Fourv(16b|8h|4s|2d)$")>;
def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST, WriteVST],
(instregex "ST1Fourv(16b|8h|4s|2d)_POST")>;
def : InstRW<[WriteVSTShuffle], (instregex "ST1i(8|16|32)$")>;
def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST1i(8|16|32)_POST")>;
def : InstRW<[WriteVSTShuffle], (instrs ST1i64)>;
def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST1i64_POST)>;
def : InstRW<[WriteVSTShuffle],
(instregex "ST2Twov(8b|4h|2s)$")>;
def : InstRW<[WriteAdr, WriteVSTShuffle],
(instregex "ST2Twov(8b|4h|2s)_POST")>;
def : InstRW<[WriteVSTShuffle, WriteVSTShuffle],
(instregex "ST2Twov(16b|8h|4s|2d)$")>;
def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],
(instregex "ST2Twov(16b|8h|4s|2d)_POST")>;
def : InstRW<[WriteVSTShuffle], (instregex "ST2i(8|16|32)$")>;
def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST2i(8|16|32)_POST")>;
def : InstRW<[WriteVSTShuffle], (instrs ST2i64)>;
def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST2i64_POST)>;
def : InstRW<[WriteVSTShuffle, WriteVSTShuffle],
(instregex "ST3Threev(8b|4h|2s)$")>;
def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],
(instregex "ST3Threev(8b|4h|2s)_POST")>;
def : InstRW<[WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle],
(instregex "ST3Threev(16b|8h|4s|2d)$")>;
def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle],
(instregex "ST3Threev(16b|8h|4s|2d)_POST")>;
def : InstRW<[WriteVSTShuffle], (instregex "ST3i(8|16|32)$")>;
def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST3i(8|16|32)_POST")>;
def :InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64)>;
def :InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64_POST)>;
def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle],
(instregex "ST4Fourv(8b|4h|2s|1d)$")>;
def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle],
(instregex "ST4Fourv(8b|4h|2s|1d)_POST")>;
def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle,
WriteVSTPairShuffle, WriteVSTPairShuffle],
(instregex "ST4Fourv(16b|8h|4s|2d)$")>;
def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle,
WriteVSTPairShuffle, WriteVSTPairShuffle],
(instregex "ST4Fourv(16b|8h|4s|2d)_POST")>;
def : InstRW<[WriteVSTPairShuffle], (instregex "ST4i(8|16|32)$")>;
def : InstRW<[WriteAdr, WriteVSTPairShuffle], (instregex "ST4i(8|16|32)_POST")>;
def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST4i64)>;
def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],(instrs ST4i64_POST)>;
} // SchedModel = CycloneModel

View File

@ -0,0 +1,92 @@
//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// Define TII for use in SchedVariant Predicates.
// const MachineInstr *MI and const TargetSchedModel *SchedModel
// are defined by default.
def : PredicateProlog<[{
const ARM64InstrInfo *TII =
static_cast<const ARM64InstrInfo*>(SchedModel->getInstrInfo());
(void)TII;
}]>;
// ARM64 Scheduler Definitions
def WriteImm : SchedWrite; // MOVN, MOVZ
// TODO: Provide variants for MOV32/64imm Pseudos that dynamically
// select the correct sequence of WriteImms.
def WriteI : SchedWrite; // ALU
def WriteISReg : SchedWrite; // ALU of Shifted-Reg
def WriteIEReg : SchedWrite; // ALU of Extended-Reg
def WriteExtr : SchedWrite; // EXTR shifts a reg pair
def ReadExtrHi : SchedRead; // Read the high reg of the EXTR pair
def WriteIS : SchedWrite; // Shift/Scale
def WriteID32 : SchedWrite; // 32-bit Divide
def WriteID64 : SchedWrite; // 64-bit Divide
def WriteIM32 : SchedWrite; // 32-bit Multiply
def WriteIM64 : SchedWrite; // 64-bit Multiply
def WriteBr : SchedWrite; // Branch
def WriteBrReg : SchedWrite; // Indirect Branch
def WriteLD : SchedWrite; // Load from base addr plus immediate offset
def WriteST : SchedWrite; // Store to base addr plus immediate offset
def WriteSTP : SchedWrite; // Store a register pair.
def WriteAdr : SchedWrite; // Address pre/post increment.
def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled).
def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled).
def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST.
// ScaledIdxPred is true if a WriteLDIdx operand will be
// scaled. Subtargets can use this to dynamically select resources and
// latency for WriteLDIdx and ReadAdrBase.
def ScaledIdxPred : SchedPredicate<[{TII->isScaledAddr(MI)}]>;
// Serialized two-level address load.
// EXAMPLE: LOADGot
def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>;
// Serialized two-level address lookup.
// EXAMPLE: MOVaddr...
def WriteAdrAdr : WriteSequence<[WriteAdr, WriteAdr]>;
// The second register of a load-pair.
// LDP,LDPSW,LDNP,LDXP,LDAXP
def WriteLDHi : SchedWrite;
// Store-exclusive is a store followed by a dependent load.
def WriteSTX : WriteSequence<[WriteST, WriteLD]>;
def WriteSys : SchedWrite; // Long, variable latency system ops.
def WriteBarrier : SchedWrite; // Memory barrier.
def WriteHint : SchedWrite; // Hint instruction.
def WriteF : SchedWrite; // General floating-point ops.
def WriteFCmp : SchedWrite; // Floating-point compare.
def WriteFCvt : SchedWrite; // Float conversion.
def WriteFCopy : SchedWrite; // Float-int register copy.
def WriteFImm : SchedWrite; // Floating-point immediate.
def WriteFMul : SchedWrite; // Floating-point multiply.
def WriteFDiv : SchedWrite; // Floating-point division.
def WriteV : SchedWrite; // Vector ops.
def WriteVLD : SchedWrite; // Vector loads.
def WriteVST : SchedWrite; // Vector stores.
// Read the unwritten lanes of the VLD's destination registers.
def ReadVLD : SchedRead;
// Sequential vector load and shuffle.
def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteV]>;
def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>;
// Store a shuffled vector.
def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>;
def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>;

View File

@ -0,0 +1,57 @@
//===-- ARM64SelectionDAGInfo.cpp - ARM64 SelectionDAG Info ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the ARM64SelectionDAGInfo class.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm64-selectiondag-info"
#include "ARM64TargetMachine.h"
using namespace llvm;
ARM64SelectionDAGInfo::ARM64SelectionDAGInfo(const TargetMachine &TM)
: TargetSelectionDAGInfo(TM),
Subtarget(&TM.getSubtarget<ARM64Subtarget>()) {}
ARM64SelectionDAGInfo::~ARM64SelectionDAGInfo() {}
SDValue ARM64SelectionDAGInfo::EmitTargetCodeForMemset(
SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, unsigned Align, bool isVolatile,
MachinePointerInfo DstPtrInfo) const {
// Check to see if there is a specialized entry-point for memory zeroing.
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
const char *bzeroEntry =
(V && V->isNullValue()) ? Subtarget->getBZeroEntry() : 0;
// For small size (< 256), it is not beneficial to use bzero
// instead of memset.
if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
const ARM64TargetLowering &TLI = *static_cast<const ARM64TargetLowering *>(
DAG.getTarget().getTargetLowering());
EVT IntPtr = TLI.getPointerTy();
Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst;
Entry.Ty = IntPtrTy;
Args.push_back(Entry);
Entry.Node = Size;
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(
Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false,
0, CallingConv::C, /*isTailCall=*/false,
/*doesNotRet=*/false, /*isReturnValueUsed=*/false,
DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
return CallResult.second;
}
return SDValue();
}

View File

@ -0,0 +1,38 @@
//===-- ARM64SelectionDAGInfo.h - ARM64 SelectionDAG Info -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the ARM64 subclass for TargetSelectionDAGInfo.
//
//===----------------------------------------------------------------------===//
#ifndef ARM64SELECTIONDAGINFO_H
#define ARM64SELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
namespace llvm {
class ARM64SelectionDAGInfo : public TargetSelectionDAGInfo {
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
/// make the right decision when generating code for different targets.
const ARM64Subtarget *Subtarget;
public:
explicit ARM64SelectionDAGInfo(const TargetMachine &TM);
~ARM64SelectionDAGInfo();
virtual SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
SDValue Chain, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVolatile,
MachinePointerInfo DstPtrInfo) const;
};
}
#endif

View File

@ -0,0 +1,169 @@
//===---- ARM64StorePairSuppress.cpp --- Suppress store pair formation ----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass identifies floating point stores that should not be combined into
// store pairs. Later we may do the same for floating point loads.
// ===---------------------------------------------------------------------===//
#define DEBUG_TYPE "arm64-stp-suppress"
#include "ARM64InstrInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace {
class ARM64StorePairSuppress : public MachineFunctionPass {
const ARM64InstrInfo *TII;
const TargetRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
MachineFunction *MF;
TargetSchedModel SchedModel;
MachineTraceMetrics *Traces;
MachineTraceMetrics::Ensemble *MinInstr;
public:
static char ID;
ARM64StorePairSuppress() : MachineFunctionPass(ID) {}
virtual const char *getPassName() const {
return "ARM64 Store Pair Suppression";
}
bool runOnMachineFunction(MachineFunction &F);
private:
bool shouldAddSTPToBlock(const MachineBasicBlock *BB);
bool isNarrowFPStore(const MachineInstr *MI);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<MachineTraceMetrics>();
AU.addPreserved<MachineTraceMetrics>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
char ARM64StorePairSuppress::ID = 0;
} // anonymous
FunctionPass *llvm::createARM64StorePairSuppressPass() {
return new ARM64StorePairSuppress();
}
/// Return true if an STP can be added to this block without increasing the
/// critical resource height. STP is good to form in Ld/St limited blocks and
/// bad to form in float-point limited blocks. This is true independent of the
/// critical path. If the critical path is longer than the resource height, the
/// extra vector ops can limit physreg renaming. Otherwise, it could simply
/// oversaturate the vector units.
bool ARM64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
if (!MinInstr)
MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB);
unsigned ResLength = BBTrace.getResourceLength();
// Get the machine model's scheduling class for STPQi.
// Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
unsigned SCIdx = TII->get(ARM64::STPDi).getSchedClass();
const MCSchedClassDesc *SCDesc =
SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);
// If a subtarget does not define resources for STPQi, bail here.
if (SCDesc->isValid() && !SCDesc->isVariant()) {
unsigned ResLenWithSTP = BBTrace.getResourceLength(
ArrayRef<const MachineBasicBlock *>(), SCDesc);
if (ResLenWithSTP > ResLength) {
DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber()
<< " resources " << ResLength << " -> " << ResLenWithSTP
<< "\n");
return false;
}
}
return true;
}
/// Return true if this is a floating-point store smaller than the V reg. On
/// cyclone, these require a vector shuffle before storing a pair.
/// Ideally we would call getMatchingPairOpcode() and have the machine model
/// tell us if it's profitable with no cpu knowledge here.
///
/// FIXME: We plan to develop a decent Target abstraction for simple loads and
/// stores. Until then use a nasty switch similar to ARM64LoadStoreOptimizer.
bool ARM64StorePairSuppress::isNarrowFPStore(const MachineInstr *MI) {
switch (MI->getOpcode()) {
default:
return false;
case ARM64::STRSui:
case ARM64::STRDui:
case ARM64::STURSi:
case ARM64::STURDi:
return true;
}
}
bool ARM64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
TII = static_cast<const ARM64InstrInfo *>(MF->getTarget().getInstrInfo());
TRI = MF->getTarget().getRegisterInfo();
MRI = &MF->getRegInfo();
const TargetSubtargetInfo &ST =
MF->getTarget().getSubtarget<TargetSubtargetInfo>();
SchedModel.init(*ST.getSchedModel(), &ST, TII);
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = 0;
DEBUG(dbgs() << "*** " << getPassName() << ": " << MF->getName() << '\n');
if (!SchedModel.hasInstrSchedModel()) {
DEBUG(dbgs() << " Skipping pass: no machine model present.\n");
return false;
}
// Check for a sequence of stores to the same base address. We don't need to
// precisely determine whether a store pair can be formed. But we do want to
// filter out most situations where we can't form store pairs to avoid
// computing trace metrics in those cases.
for (MachineFunction::iterator BI = MF->begin(), BE = MF->end(); BI != BE;
++BI) {
bool SuppressSTP = false;
unsigned PrevBaseReg = 0;
for (MachineBasicBlock::iterator I = BI->begin(), E = BI->end(); I != E;
++I) {
if (!isNarrowFPStore(I))
continue;
unsigned BaseReg;
unsigned Offset;
if (TII->getLdStBaseRegImmOfs(I, BaseReg, Offset, TRI)) {
if (PrevBaseReg == BaseReg) {
// If this block can take STPs, skip ahead to the next block.
if (!SuppressSTP && shouldAddSTPToBlock(I->getParent()))
break;
// Otherwise, continue unpairing the stores in this block.
DEBUG(dbgs() << "Unpairing store " << *I << "\n");
SuppressSTP = true;
TII->suppressLdStPair(I);
}
PrevBaseReg = BaseReg;
} else
PrevBaseReg = 0;
}
}
// This pass just sets some internal MachineMemOperand flags. It can't really
// invalidate anything.
return false;
}

View File

@ -0,0 +1,83 @@
//===-- ARM64Subtarget.cpp - ARM64 Subtarget Information --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the ARM64 specific subclass of TargetSubtarget.
//
//===----------------------------------------------------------------------===//
#include "ARM64InstrInfo.h"
#include "ARM64Subtarget.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_CTOR
#define GET_SUBTARGETINFO_TARGET_DESC
#include "ARM64GenSubtargetInfo.inc"
using namespace llvm;
ARM64Subtarget::ARM64Subtarget(const std::string &TT, const std::string &CPU,
const std::string &FS)
: ARM64GenSubtargetInfo(TT, CPU, FS), HasZeroCycleRegMove(false),
HasZeroCycleZeroing(false), CPUString(CPU), TargetTriple(TT) {
// Determine default and user-specified characteristics
if (CPUString.empty())
// We default to Cyclone for now.
CPUString = "cyclone";
ParseSubtargetFeatures(CPUString, FS);
}
/// ClassifyGlobalReference - Find the target operand flags that describe
/// how a global value should be referenced for the current subtarget.
unsigned char
ARM64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
const TargetMachine &TM) const {
// Determine whether this is a reference to a definition or a declaration.
// Materializable GVs (in JIT lazy compilation mode) do not require an extra
// load from stub.
bool isDecl = GV->hasAvailableExternallyLinkage();
if (GV->isDeclaration() && !GV->isMaterializable())
isDecl = true;
// If symbol visibility is hidden, the extra load is not needed if
// the symbol is definitely defined in the current translation unit.
if (TM.getRelocationModel() != Reloc::Static && GV->hasDefaultVisibility() &&
(isDecl || GV->isWeakForLinker()))
return ARM64II::MO_GOT;
if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
return ARM64II::MO_GOT;
// FIXME: this will fail on static ELF for weak symbols.
return ARM64II::MO_NO_FLAG;
}
/// This function returns the name of a function which has an interface
/// like the non-standard bzero function, if such a function exists on
/// the current subtarget and it is considered prefereable over
/// memset with zero passed as the second argument. Otherwise it
/// returns null.
const char *ARM64Subtarget::getBZeroEntry() const {
// At the moment, always prefer bzero.
return "bzero";
}
void ARM64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
MachineInstr *begin, MachineInstr *end,
unsigned NumRegionInstrs) const {
// LNT run (at least on Cyclone) showed reasonably significant gains for
// bi-directional scheduling. 253.perlbmk.
Policy.OnlyTopDown = false;
Policy.OnlyBottomUp = false;
}

View File

@ -0,0 +1,87 @@
//=====---- ARM64Subtarget.h - Define Subtarget for the ARM64 -*- C++ -*--====//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the ARM64 specific subclass of TargetSubtarget.
//
//===----------------------------------------------------------------------===//
#ifndef ARM64SUBTARGET_H
#define ARM64SUBTARGET_H
#include "llvm/Target/TargetSubtargetInfo.h"
#include "ARM64RegisterInfo.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
#include "ARM64GenSubtargetInfo.inc"
namespace llvm {
class GlobalValue;
class StringRef;
class ARM64Subtarget : public ARM64GenSubtargetInfo {
protected:
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove;
// HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
bool HasZeroCycleZeroing;
/// CPUString - String name of used CPU.
std::string CPUString;
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
ARM64Subtarget(const std::string &TT, const std::string &CPU,
const std::string &FS);
virtual bool enableMachineScheduler() const { return true; }
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
bool isCyclone() const { return CPUString == "cyclone"; }
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
unsigned getMaxInlineSizeThreshold() const { return 64; }
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
/// ClassifyGlobalReference - Find the target operand flags that describe
/// how a global value should be referenced for the current subtarget.
unsigned char ClassifyGlobalReference(const GlobalValue *GV,
const TargetMachine &TM) const;
/// This function returns the name of a function which has an interface
/// like the non-standard bzero function, if such a function exists on
/// the current subtarget and it is considered prefereable over
/// memset with zero passed as the second argument. Otherwise it
/// returns null.
const char *getBZeroEntry() const;
void overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin,
MachineInstr *end, unsigned NumRegionInstrs) const;
};
} // End llvm namespace
#endif // ARM64SUBTARGET_H

View File

@ -0,0 +1,157 @@
//===-- ARM64TargetMachine.cpp - Define TargetMachine for ARM64 -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//
//===----------------------------------------------------------------------===//
#include "ARM64.h"
#include "ARM64TargetMachine.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
static cl::opt<bool> EnableCCMP("arm64-ccmp",
cl::desc("Enable the CCMP formation pass"),
cl::init(true));
static cl::opt<bool> EnableStPairSuppress("arm64-stp-suppress", cl::Hidden,
cl::desc("Suppress STP for ARM64"),
cl::init(true));
static cl::opt<bool>
EnablePromoteConstant("arm64-promote-const", cl::Hidden,
cl::desc("Enable the promote constant pass"),
cl::init(true));
static cl::opt<bool>
EnableCollectLOH("arm64-collect-loh", cl::Hidden,
cl::desc("Enable the pass that emits the linker"
" optimization hints (LOH)"),
cl::init(true));
extern "C" void LLVMInitializeARM64Target() {
// Register the target.
RegisterTargetMachine<ARM64TargetMachine> X(TheARM64Target);
}
/// TargetMachine ctor - Create an ARM64 architecture model.
///
ARM64TargetMachine::ARM64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS),
DL(Subtarget.isTargetMachO() ? "e-m:o-i64:64-i128:128-n32:64-S128"
: "e-m:e-i64:64-i128:128-n32:64-S128"),
InstrInfo(Subtarget), TLInfo(*this), FrameLowering(*this, Subtarget),
TSInfo(*this) {
initAsmInfo();
}
namespace {
/// ARM64 Code Generator Pass Configuration Options.
class ARM64PassConfig : public TargetPassConfig {
public:
ARM64PassConfig(ARM64TargetMachine *TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
ARM64TargetMachine &getARM64TargetMachine() const {
return getTM<ARM64TargetMachine>();
}
virtual bool addPreISel();
virtual bool addInstSelector();
virtual bool addILPOpts();
virtual bool addPreRegAlloc();
virtual bool addPostRegAlloc();
virtual bool addPreSched2();
virtual bool addPreEmitPass();
};
} // namespace
void ARM64TargetMachine::addAnalysisPasses(PassManagerBase &PM) {
// Add first the target-independent BasicTTI pass, then our ARM64 pass. This
// allows the ARM64 pass to delegate to the target independent layer when
// appropriate.
PM.add(createBasicTargetTransformInfoPass(this));
PM.add(createARM64TargetTransformInfoPass(this));
}
TargetPassConfig *ARM64TargetMachine::createPassConfig(PassManagerBase &PM) {
return new ARM64PassConfig(this, PM);
}
// Pass Pipeline Configuration
bool ARM64PassConfig::addPreISel() {
// Run promote constant before global merge, so that the promoted constants
// get a chance to be merged
if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
addPass(createARM64PromoteConstantPass());
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createGlobalMergePass(TM));
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createARM64AddressTypePromotionPass());
return false;
}
bool ARM64PassConfig::addInstSelector() {
addPass(createARM64ISelDag(getARM64TargetMachine(), getOptLevel()));
// For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
// references to _TLS_MODULE_BASE_ as possible.
if (TM->getSubtarget<ARM64Subtarget>().isTargetELF() &&
getOptLevel() != CodeGenOpt::None)
addPass(createARM64CleanupLocalDynamicTLSPass());
return false;
}
bool ARM64PassConfig::addILPOpts() {
if (EnableCCMP)
addPass(createARM64ConditionalCompares());
addPass(&EarlyIfConverterID);
if (EnableStPairSuppress)
addPass(createARM64StorePairSuppressPass());
return true;
}
bool ARM64PassConfig::addPreRegAlloc() {
// Use AdvSIMD scalar instructions whenever profitable.
addPass(createARM64AdvSIMDScalar());
return true;
}
bool ARM64PassConfig::addPostRegAlloc() {
// Change dead register definitions to refer to the zero register.
addPass(createARM64DeadRegisterDefinitions());
return true;
}
bool ARM64PassConfig::addPreSched2() {
// Expand some pseudo instructions to allow proper scheduling.
addPass(createARM64ExpandPseudoPass());
// Use load/store pair instructions when possible.
addPass(createARM64LoadStoreOptimizationPass());
return true;
}
bool ARM64PassConfig::addPreEmitPass() {
// Relax conditional branch instructions if they're otherwise out of
// range of their destination.
addPass(createARM64BranchRelaxation());
if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH)
addPass(createARM64CollectLOHPass());
return true;
}

View File

@ -0,0 +1,69 @@
//===-- ARM64TargetMachine.h - Define TargetMachine for ARM64 ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the ARM64 specific subclass of TargetMachine.
//
//===----------------------------------------------------------------------===//
#ifndef ARM64TARGETMACHINE_H
#define ARM64TARGETMACHINE_H
#include "ARM64InstrInfo.h"
#include "ARM64ISelLowering.h"
#include "ARM64Subtarget.h"
#include "ARM64FrameLowering.h"
#include "ARM64SelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/MC/MCStreamer.h"
namespace llvm {
class ARM64TargetMachine : public LLVMTargetMachine {
protected:
ARM64Subtarget Subtarget;
private:
const DataLayout DL;
ARM64InstrInfo InstrInfo;
ARM64TargetLowering TLInfo;
ARM64FrameLowering FrameLowering;
ARM64SelectionDAGInfo TSInfo;
public:
ARM64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OL);
virtual const ARM64Subtarget *getSubtargetImpl() const { return &Subtarget; }
virtual const ARM64TargetLowering *getTargetLowering() const {
return &TLInfo;
}
virtual const DataLayout *getDataLayout() const { return &DL; }
virtual const ARM64FrameLowering *getFrameLowering() const {
return &FrameLowering;
}
virtual const ARM64InstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const ARM64RegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
virtual const ARM64SelectionDAGInfo *getSelectionDAGInfo() const {
return &TSInfo;
}
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
/// \brief Register ARM64 analysis passes with a pass manager.
virtual void addAnalysisPasses(PassManagerBase &PM);
};
} // end namespace llvm
#endif

View File

@ -0,0 +1,52 @@
//===-- ARM64TargetObjectFile.cpp - ARM64 Object Info ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "ARM64TargetObjectFile.h"
#include "ARM64TargetMachine.h"
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/Dwarf.h"
using namespace llvm;
using namespace dwarf;
void ARM64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(TM.Options.UseInitArray);
}
const MCExpr *ARM64_MachoTargetObjectFile::getTTypeGlobalReference(
const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
const TargetMachine &TM, MachineModuleInfo *MMI,
MCStreamer &Streamer) const {
// On Darwin, we can reference dwarf symbols with foo@GOT-., which
// is an indirect pc-relative reference. The default implementation
// won't reference using the GOT, so we need this target-specific
// version.
if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
const MCSymbol *Sym = TM.getSymbol(GV, Mang);
const MCExpr *Res =
MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
MCSymbol *PCSym = getContext().CreateTempSymbol();
Streamer.EmitLabel(PCSym);
const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
return MCBinaryExpr::CreateSub(Res, PC, getContext());
}
return TargetLoweringObjectFileMachO::getTTypeGlobalReference(
GV, Encoding, Mang, TM, MMI, Streamer);
}
MCSymbol *ARM64_MachoTargetObjectFile::getCFIPersonalitySymbol(
const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
MachineModuleInfo *MMI) const {
return TM.getSymbol(GV, Mang);
}

View File

@ -0,0 +1,40 @@
//===-- ARM64TargetObjectFile.h - ARM64 Object Info -*- C++ -------------*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TARGET_ARM64_TARGETOBJECTFILE_H
#define LLVM_TARGET_ARM64_TARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
namespace llvm {
class ARM64TargetMachine;
/// This implementation is used for AArch64 ELF targets (Linux in particular).
class ARM64_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
};
/// ARM64_MachoTargetObjectFile - This TLOF implementation is used for Darwin.
class ARM64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
public:
const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
unsigned Encoding, Mangler &Mang,
const TargetMachine &TM,
MachineModuleInfo *MMI,
MCStreamer &Streamer) const override;
MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang,
const TargetMachine &TM,
MachineModuleInfo *MMI) const override;
};
} // end namespace llvm
#endif

View File

@ -0,0 +1,326 @@
//===-- ARM64TargetTransformInfo.cpp - ARM64 specific TTI pass ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/// \file
/// This file implements a TargetTransformInfo analysis pass specific to the
/// ARM64 target machine. It uses the target's detailed information to provide
/// more precise answers to certain TTI queries, while letting the target
/// independent and default TTI implementations handle the rest.
///
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm64tti"
#include "ARM64.h"
#include "ARM64TargetMachine.h"
#include "MCTargetDesc/ARM64AddressingModes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
// Declare the pass initialization routine locally as target-specific passes
// don't havve a target-wide initialization entry point, and so we rely on the
// pass constructor initialization.
namespace llvm {
void initializeARM64TTIPass(PassRegistry &);
}
namespace {
class ARM64TTI final : public ImmutablePass, public TargetTransformInfo {
const ARM64TargetMachine *TM;
const ARM64Subtarget *ST;
const ARM64TargetLowering *TLI;
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
/// are set if the result needs to be inserted and/or extracted from vectors.
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
public:
ARM64TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
llvm_unreachable("This pass cannot be directly constructed");
}
ARM64TTI(const ARM64TargetMachine *TM)
: ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
TLI(TM->getTargetLowering()) {
initializeARM64TTIPass(*PassRegistry::getPassRegistry());
}
void initializePass() override { pushTTIStack(this); }
void getAnalysisUsage(AnalysisUsage &AU) const override {
TargetTransformInfo::getAnalysisUsage(AU);
}
/// Pass identification.
static char ID;
/// Provide necessary pointer adjustments for the two base classes.
void *getAdjustedAnalysisPointer(const void *ID) override {
if (ID == &TargetTransformInfo::ID)
return (TargetTransformInfo *)this;
return this;
}
/// \name Scalar TTI Implementations
/// @{
unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
/// @}
/// \name Vector TTI Implementations
/// @{
unsigned getNumberOfRegisters(bool Vector) const override {
if (Vector)
return 32;
return 31;
}
unsigned getRegisterBitWidth(bool Vector) const override {
if (Vector)
return 128;
return 64;
}
unsigned getMaximumUnrollFactor() const override { return 2; }
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const
override;
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const
override;
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
OperandValueKind Opd1Info = OK_AnyValue,
OperandValueKind Opd2Info = OK_AnyValue) const
override;
unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override;
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const
override;
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace) const override;
/// @}
};
} // end anonymous namespace
INITIALIZE_AG_PASS(ARM64TTI, TargetTransformInfo, "arm64tti",
"ARM64 Target Transform Info", true, true, false)
char ARM64TTI::ID = 0;
ImmutablePass *
llvm::createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM) {
return new ARM64TTI(TM);
}
unsigned ARM64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
if (BitSize == 0)
return ~0U;
int64_t Val = Imm.getSExtValue();
if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, BitSize))
return 1;
if ((int64_t)Val < 0)
Val = ~Val;
if (BitSize == 32)
Val &= (1LL << 32) - 1;
unsigned LZ = countLeadingZeros((uint64_t)Val);
unsigned Shift = (63 - LZ) / 16;
// MOVZ is free so return true for one or fewer MOVK.
return (Shift == 0) ? 1 : Shift;
}
ARM64TTI::PopcntSupportKind ARM64TTI::getPopcntSupport(unsigned TyWidth) const {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
if (TyWidth == 32 || TyWidth == 64)
return PSK_FastHardware;
// TODO: ARM64TargetLowering::LowerCTPOP() supports 128bit popcount.
return PSK_Software;
}
unsigned ARM64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
Type *Src) const {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
EVT SrcTy = TLI->getValueType(Src);
EVT DstTy = TLI->getValueType(Dst);
if (!SrcTy.isSimple() || !DstTy.isSimple())
return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
// LowerVectorINT_TO_FP:
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
// LowerVectorFP_TO_INT
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 },
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 4 },
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 4 },
};
int Idx = ConvertCostTableLookup<MVT>(
ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(),
SrcTy.getSimpleVT());
if (Idx != -1)
return ConversionTbl[Idx].Cost;
return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
}
unsigned ARM64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) const {
assert(Val->isVectorTy() && "This must be a vector type");
if (Index != -1U) {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
// This type is legalized to a scalar type.
if (!LT.second.isVector())
return 0;
// The type may be split. Normalize the index to the new type.
unsigned Width = LT.second.getVectorNumElements();
Index = Index % Width;
// The element at index zero is already inside the vector.
if (Index == 0)
return 0;
}
// All other insert/extracts cost this much.
return 2;
}
unsigned ARM64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
OperandValueKind Opd1Info,
OperandValueKind Opd2Info) const {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
switch (ISD) {
default:
return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Opd1Info,
Opd2Info);
case ISD::ADD:
case ISD::MUL:
case ISD::XOR:
case ISD::OR:
case ISD::AND:
// These nodes are marked as 'custom' for combining purposes only.
// We know that they are legal. See LowerAdd in ISelLowering.
return 1 * LT.first;
}
}
unsigned ARM64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
// extra micro-ops can significantly decrease throughput.
unsigned NumVectorInstToHideOverhead = 10;
if (Ty->isVectorTy() && IsComplex)
return NumVectorInstToHideOverhead;
// In many cases the address computation is not merged into the instruction
// addressing mode.
return 1;
}
unsigned ARM64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy) const {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// We don't lower vector selects well that are wider than the register width.
if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
// We would need this many instructions to hide the scalarization happening.
unsigned AmortizationCost = 20;
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
VectorSelectTbl[] = {
{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost },
{ ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost },
{ ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost },
{ ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
{ ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
{ ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
};
EVT SelCondTy = TLI->getValueType(CondTy);
EVT SelValTy = TLI->getValueType(ValTy);
if (SelCondTy.isSimple() && SelValTy.isSimple()) {
int Idx =
ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(),
SelValTy.getSimpleVT());
if (Idx != -1)
return VectorSelectTbl[Idx].Cost;
}
}
return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
unsigned ARM64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) const {
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
Src->getVectorElementType()->isIntegerTy(64)) {
// Unaligned stores are extremely inefficient. We don't split
// unaligned v2i64 stores because the negative impact that has shown in
// practice on inlined memcpy code.
// We make v2i64 stores expensive so that we will only vectorize if there
// are 6 other instructions getting vectorized.
unsigned AmortizationCost = 6;
return LT.first * 2 * AmortizationCost;
}
if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) &&
Src->getVectorNumElements() < 8) {
// We scalarize the loads/stores because there is not v.4b register and we
// have to promote the elements to v.4h.
unsigned NumVecElts = Src->getVectorNumElements();
unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
// We generate 2 instructions per vector element.
return NumVectorizableInstsToAmortize * NumVecElts * 2;
}
return LT.first;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,6 @@
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMARM64AsmParser
ARM64AsmParser.cpp
)

View File

@ -0,0 +1,24 @@
;===- ./lib/Target/ARM64/AsmParser/LLVMBuild.txt ---------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[component_0]
type = Library
name = ARM64AsmParser
parent = ARM64
required_libraries = ARM64Desc ARM64Info MC MCParser Support
add_to_library_groups = ARM64

View File

@ -0,0 +1,15 @@
##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
LIBRARYNAME = LLVMARM64AsmParser
# Hack: we need to include 'main' ARM target directory to grab private headers
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common

View File

@ -0,0 +1,50 @@
set(LLVM_TARGET_DEFINITIONS ARM64.td)
tablegen(LLVM ARM64GenRegisterInfo.inc -gen-register-info)
tablegen(LLVM ARM64GenInstrInfo.inc -gen-instr-info)
tablegen(LLVM ARM64GenMCCodeEmitter.inc -gen-emitter -mc-emitter)
tablegen(LLVM ARM64GenMCPseudoLowering.inc -gen-pseudo-lowering)
tablegen(LLVM ARM64GenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM ARM64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
tablegen(LLVM ARM64GenAsmMatcher.inc -gen-asm-matcher)
tablegen(LLVM ARM64GenDAGISel.inc -gen-dag-isel)
tablegen(LLVM ARM64GenFastISel.inc -gen-fast-isel)
tablegen(LLVM ARM64GenCallingConv.inc -gen-callingconv)
tablegen(LLVM ARM64GenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM ARM64GenDisassemblerTables.inc -gen-disassembler)
add_public_tablegen_target(ARM64CommonTableGen)
add_llvm_target(ARM64CodeGen
ARM64AddressTypePromotion.cpp
ARM64AdvSIMDScalarPass.cpp
ARM64AsmPrinter.cpp
ARM64BranchRelaxation.cpp
ARM64CleanupLocalDynamicTLSPass.cpp
ARM64CollectLOH.cpp
ARM64ConditionalCompares.cpp
ARM64DeadRegisterDefinitionsPass.cpp
ARM64ExpandPseudoInsts.cpp
ARM64FastISel.cpp
ARM64FrameLowering.cpp
ARM64ISelDAGToDAG.cpp
ARM64ISelLowering.cpp
ARM64InstrInfo.cpp
ARM64LoadStoreOptimizer.cpp
ARM64MCInstLower.cpp
ARM64PromoteConstant.cpp
ARM64RegisterInfo.cpp
ARM64SelectionDAGInfo.cpp
ARM64StorePairSuppress.cpp
ARM64Subtarget.cpp
ARM64TargetMachine.cpp
ARM64TargetObjectFile.cpp
ARM64TargetTransformInfo.cpp
)
add_dependencies(LLVMARM64CodeGen intrinsics_gen)
add_subdirectory(TargetInfo)
add_subdirectory(AsmParser)
add_subdirectory(Disassembler)
add_subdirectory(InstPrinter)
add_subdirectory(MCTargetDesc)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,54 @@
//===- ARM64Disassembler.h - Disassembler for ARM64 -------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//
//===----------------------------------------------------------------------===//
#ifndef ARM64DISASSEMBLER_H
#define ARM64DISASSEMBLER_H
#include "llvm/MC/MCDisassembler.h"
namespace llvm {
class MCInst;
class MemoryObject;
class raw_ostream;
class ARM64Disassembler : public MCDisassembler {
public:
ARM64Disassembler(const MCSubtargetInfo &STI) : MCDisassembler(STI) {}
~ARM64Disassembler() {}
/// getInstruction - See MCDisassembler.
MCDisassembler::DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
const MemoryObject &region,
uint64_t address,
raw_ostream &vStream,
raw_ostream &cStream) const;
/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
/// operand in place of the immediate Value in the MCInst. The immediate
/// Value has not had any PC adjustment made by the caller. If the instruction
/// adds the PC to the immediate Value then InstsAddsAddressToValue is true,
/// else false. If the getOpInfo() function was set as part of the
/// setupForSymbolicDisassembly() call then that function is called to get any
/// symbolic information at the Address for this instrution. If that returns
/// non-zero then the symbolic information it returns is used to create an
/// MCExpr and that is added as an operand to the MCInst. This function
/// returns true if it adds an operand to the MCInst and false otherwise.
bool tryAddingSymbolicOperand(uint64_t Address, int Value,
bool InstsAddsAddressToValue, uint64_t InstSize,
MCInst &MI, uint32_t insn = 0) const;
};
} // namespace llvm
#endif

View File

@ -0,0 +1,13 @@
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMARM64Disassembler
ARM64Disassembler.cpp
)
# workaround for hanging compilation on MSVC8, 9 and 10
#if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
#set_property(
# SOURCE ARMDisassembler.cpp
# PROPERTY COMPILE_FLAGS "/Od"
# )
#endif()
add_dependencies(LLVMARM64Disassembler ARM64CommonTableGen)

View File

@ -0,0 +1,24 @@
;===- ./lib/Target/ARM64/Disassembler/LLVMBuild.txt ------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[component_0]
type = Library
name = ARM64Disassembler
parent = ARM64
required_libraries = ARM64Desc ARM64Info MC Support
add_to_library_groups = ARM64

View File

@ -0,0 +1,16 @@
##===- lib/Target/ARM64/Disassembler/Makefile --------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
LIBRARYNAME = LLVMARM64Disassembler
# Hack: we need to include 'main' arm target directory to grab private headers
CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,157 @@
//===-- ARM64InstPrinter.h - Convert ARM64 MCInst to assembly syntax ------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This class prints an ARM64 MCInst to a .s file.
//
//===----------------------------------------------------------------------===//
#ifndef ARM64INSTPRINTER_H
#define ARM64INSTPRINTER_H
#include "MCTargetDesc/ARM64MCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
class MCOperand;
class ARM64InstPrinter : public MCInstPrinter {
public:
ARM64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
// Autogenerated by tblgen.
virtual void printInstruction(const MCInst *MI, raw_ostream &O);
virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O);
virtual StringRef getRegName(unsigned RegNo) const {
return getRegisterName(RegNo);
}
static const char *getRegisterName(unsigned RegNo,
unsigned AltIdx = ARM64::NoRegAltName);
protected:
bool printSysAlias(const MCInst *MI, raw_ostream &O);
// Operand printers
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm,
raw_ostream &O);
void printPostIncOperand1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPostIncOperand2(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPostIncOperand3(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPostIncOperand4(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPostIncOperand6(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPostIncOperand8(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPostIncOperand12(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPostIncOperand16(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPostIncOperand24(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPostIncOperand32(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPostIncOperand48(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPostIncOperand64(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printVRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSysCROperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printAddSubImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printLogicalImm32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printLogicalImm64(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printShifter(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printShiftedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printDotCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAlignedBranchTarget(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printAMIndexed(const MCInst *MI, unsigned OpNum, unsigned Scale,
raw_ostream &O);
void printAMIndexed128(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
printAMIndexed(MI, OpNum, 16, O);
}
void printAMIndexed64(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
printAMIndexed(MI, OpNum, 8, O);
}
void printAMIndexed32(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
printAMIndexed(MI, OpNum, 4, O);
}
void printAMIndexed16(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
printAMIndexed(MI, OpNum, 2, O);
}
void printAMIndexed8(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
printAMIndexed(MI, OpNum, 1, O);
}
void printAMUnscaled(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
printAMIndexed(MI, OpNum, 1, O);
}
void printAMNoIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printImmScale4(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printImmScale8(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printImmScale16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMemoryPostIndexed(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMemoryPostIndexed32(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printMemoryPostIndexed64(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printMemoryPostIndexed128(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printMemoryRegOffset(const MCInst *MI, unsigned OpNum, raw_ostream &O,
int LegalShiftAmt);
void printMemoryRegOffset8(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMemoryRegOffset16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMemoryRegOffset32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMemoryRegOffset64(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMemoryRegOffset128(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O,
StringRef LayoutSuffix);
/// Print a list of vector registers where the type suffix is implicit
/// (i.e. attached to the instruction rather than the registers).
void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
template <unsigned NumLanes, char LaneKind>
void printTypedVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAdrpLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printBarrierOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printSystemCPSRField(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
};
class ARM64AppleInstPrinter : public ARM64InstPrinter {
public:
ARM64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
virtual void printInstruction(const MCInst *MI, raw_ostream &O);
virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O);
virtual StringRef getRegName(unsigned RegNo) const {
return getRegisterName(RegNo);
}
static const char *getRegisterName(unsigned RegNo,
unsigned AltIdx = ARM64::NoRegAltName);
};
}
#endif

View File

@ -0,0 +1,7 @@
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMARM64AsmPrinter
ARM64InstPrinter.cpp
)
add_dependencies(LLVMARM64AsmPrinter ARM64CommonTableGen)

View File

@ -0,0 +1,24 @@
;===- ./lib/Target/ARM64/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[component_0]
type = Library
name = ARM64AsmPrinter
parent = ARM64
required_libraries = MC Support
add_to_library_groups = ARM64

View File

@ -0,0 +1,15 @@
##===- lib/Target/ARM64/AsmPrinter/Makefile ----------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
LIBRARYNAME = LLVMARM64AsmPrinter
# Hack: we need to include 'main' arm target directory to grab private headers
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common

View File

@ -0,0 +1,36 @@
;===- ./lib/Target/ARM64/LLVMBuild.txt -------------------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[common]
subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
[component_0]
type = TargetGroup
name = ARM64
parent = Target
has_asmparser = 1
has_asmprinter = 1
has_disassembler = 1
has_jit = 1
[component_1]
type = Library
name = ARM64CodeGen
parent = ARM64
required_libraries = ARM64AsmPrinter ARM64Desc ARM64Info Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target
add_to_library_groups = ARM64

View File

@ -0,0 +1,759 @@
//===- ARM64AddressingModes.h - ARM64 Addressing Modes ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the ARM64 addressing mode implementation stuff.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TARGET_ARM64_ARM64ADDRESSINGMODES_H
#define LLVM_TARGET_ARM64_ARM64ADDRESSINGMODES_H
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
namespace llvm {
/// ARM64_AM - ARM64 Addressing Mode Stuff
namespace ARM64_AM {
//===----------------------------------------------------------------------===//
// Shifts
//
enum ShiftType {
InvalidShift = -1,
LSL = 0,
LSR = 1,
ASR = 2,
ROR = 3,
MSL = 4
};
/// getShiftName - Get the string encoding for the shift type.
static inline const char *getShiftName(ARM64_AM::ShiftType ST) {
switch (ST) {
default: assert(false && "unhandled shift type!");
case ARM64_AM::LSL: return "lsl";
case ARM64_AM::LSR: return "lsr";
case ARM64_AM::ASR: return "asr";
case ARM64_AM::ROR: return "ror";
case ARM64_AM::MSL: return "msl";
}
return 0;
}
/// getShiftType - Extract the shift type.
static inline ARM64_AM::ShiftType getShiftType(unsigned Imm) {
return ARM64_AM::ShiftType((Imm >> 6) & 0x7);
}
/// getShiftValue - Extract the shift value.
static inline unsigned getShiftValue(unsigned Imm) {
return Imm & 0x3f;
}
/// getShifterImm - Encode the shift type and amount:
/// imm: 6-bit shift amount
/// shifter: 000 ==> lsl
/// 001 ==> lsr
/// 010 ==> asr
/// 011 ==> ror
/// 100 ==> msl
/// {8-6} = shifter
/// {5-0} = imm
static inline unsigned getShifterImm(ARM64_AM::ShiftType ST, unsigned Imm) {
assert((Imm & 0x3f) == Imm && "Illegal shifted immedate value!");
return (unsigned(ST) << 6) | (Imm & 0x3f);
}
//===----------------------------------------------------------------------===//
// Extends
//
enum ExtendType {
InvalidExtend = -1,
UXTB = 0,
UXTH = 1,
UXTW = 2,
UXTX = 3,
SXTB = 4,
SXTH = 5,
SXTW = 6,
SXTX = 7
};
/// getExtendName - Get the string encoding for the extend type.
static inline const char *getExtendName(ARM64_AM::ExtendType ET) {
switch (ET) {
default: assert(false && "unhandled extend type!");
case ARM64_AM::UXTB: return "uxtb";
case ARM64_AM::UXTH: return "uxth";
case ARM64_AM::UXTW: return "uxtw";
case ARM64_AM::UXTX: return "uxtx";
case ARM64_AM::SXTB: return "sxtb";
case ARM64_AM::SXTH: return "sxth";
case ARM64_AM::SXTW: return "sxtw";
case ARM64_AM::SXTX: return "sxtx";
}
return 0;
}
/// getArithShiftValue - get the arithmetic shift value.
static inline unsigned getArithShiftValue(unsigned Imm) {
return Imm & 0x7;
}
/// getExtendType - Extract the extend type for operands of arithmetic ops.
static inline ARM64_AM::ExtendType getArithExtendType(unsigned Imm) {
return ARM64_AM::ExtendType((Imm >> 3) & 0x7);
}
/// getArithExtendImm - Encode the extend type and shift amount for an
/// arithmetic instruction:
/// imm: 3-bit extend amount
/// shifter: 000 ==> uxtb
/// 001 ==> uxth
/// 010 ==> uxtw
/// 011 ==> uxtx
/// 100 ==> sxtb
/// 101 ==> sxth
/// 110 ==> sxtw
/// 111 ==> sxtx
/// {5-3} = shifter
/// {2-0} = imm3
static inline unsigned getArithExtendImm(ARM64_AM::ExtendType ET,
unsigned Imm) {
assert((Imm & 0x7) == Imm && "Illegal shifted immedate value!");
return (unsigned(ET) << 3) | (Imm & 0x7);
}
/// getMemDoShift - Extract the "do shift" flag value for load/store
/// instructions.
static inline bool getMemDoShift(unsigned Imm) {
return (Imm & 0x1) != 0;
}
/// getExtendType - Extract the extend type for the offset operand of
/// loads/stores.
static inline ARM64_AM::ExtendType getMemExtendType(unsigned Imm) {
return ARM64_AM::ExtendType((Imm >> 1) & 0x7);
}
/// getExtendImm - Encode the extend type and amount for a load/store inst:
/// imm: 3-bit extend amount
/// shifter: 000 ==> uxtb
/// 001 ==> uxth
/// 010 ==> uxtw
/// 011 ==> uxtx
/// 100 ==> sxtb
/// 101 ==> sxth
/// 110 ==> sxtw
/// 111 ==> sxtx
/// {3-1} = shifter
/// {0} = imm3
static inline unsigned getMemExtendImm(ARM64_AM::ExtendType ET, bool Imm) {
assert((Imm & 0x7) == Imm && "Illegal shifted immedate value!");
return (unsigned(ET) << 1) | (Imm & 0x7);
}
//===----------------------------------------------------------------------===//
// Prefetch
//
/// Pre-fetch operator names.
/// The enum values match the encoding values:
/// prfop<4:3> 00=preload data, 10=prepare for store
/// prfop<2:1> 00=target L1 cache, 01=target L2 cache, 10=target L3 cache,
/// prfop<0> 0=non-streaming (temporal), 1=streaming (non-temporal)
enum PrefetchOp {
InvalidPrefetchOp = -1,
PLDL1KEEP = 0x00,
PLDL1STRM = 0x01,
PLDL2KEEP = 0x02,
PLDL2STRM = 0x03,
PLDL3KEEP = 0x04,
PLDL3STRM = 0x05,
PSTL1KEEP = 0x10,
PSTL1STRM = 0x11,
PSTL2KEEP = 0x12,
PSTL2STRM = 0x13,
PSTL3KEEP = 0x14,
PSTL3STRM = 0x15
};
/// isNamedPrefetchOp - Check if the prefetch-op 5-bit value has a name.
static inline bool isNamedPrefetchOp(unsigned prfop) {
switch (prfop) {
default: return false;
case ARM64_AM::PLDL1KEEP: case ARM64_AM::PLDL1STRM: case ARM64_AM::PLDL2KEEP:
case ARM64_AM::PLDL2STRM: case ARM64_AM::PLDL3KEEP: case ARM64_AM::PLDL3STRM:
case ARM64_AM::PSTL1KEEP: case ARM64_AM::PSTL1STRM: case ARM64_AM::PSTL2KEEP:
case ARM64_AM::PSTL2STRM: case ARM64_AM::PSTL3KEEP: case ARM64_AM::PSTL3STRM:
return true;
}
}
/// getPrefetchOpName - Get the string encoding for the prefetch operator.
static inline const char *getPrefetchOpName(ARM64_AM::PrefetchOp prfop) {
switch (prfop) {
default: assert(false && "unhandled prefetch-op type!");
case ARM64_AM::PLDL1KEEP: return "pldl1keep";
case ARM64_AM::PLDL1STRM: return "pldl1strm";
case ARM64_AM::PLDL2KEEP: return "pldl2keep";
case ARM64_AM::PLDL2STRM: return "pldl2strm";
case ARM64_AM::PLDL3KEEP: return "pldl3keep";
case ARM64_AM::PLDL3STRM: return "pldl3strm";
case ARM64_AM::PSTL1KEEP: return "pstl1keep";
case ARM64_AM::PSTL1STRM: return "pstl1strm";
case ARM64_AM::PSTL2KEEP: return "pstl2keep";
case ARM64_AM::PSTL2STRM: return "pstl2strm";
case ARM64_AM::PSTL3KEEP: return "pstl3keep";
case ARM64_AM::PSTL3STRM: return "pstl3strm";
}
return 0;
}
static inline uint64_t ror(uint64_t elt, unsigned size) {
return ((elt & 1) << (size-1)) | (elt >> 1);
}
/// processLogicalImmediate - Determine if an immediate value can be encoded
/// as the immediate operand of a logical instruction for the given register
/// size. If so, return true with "encoding" set to the encoded value in
/// the form N:immr:imms.
static inline bool processLogicalImmediate(uint64_t imm, unsigned regSize,
uint64_t &encoding) {
if (imm == 0ULL || imm == ~0ULL ||
(regSize != 64 && (imm >> regSize != 0 || imm == ~0U)))
return false;
unsigned size = 2;
uint64_t eltVal = imm;
// First, determine the element size.
while (size < regSize) {
unsigned numElts = regSize / size;
unsigned mask = (1ULL << size) - 1;
uint64_t lowestEltVal = imm & mask;
bool allMatched = true;
for (unsigned i = 1; i < numElts; ++i) {
uint64_t currEltVal = (imm >> (i*size)) & mask;
if (currEltVal != lowestEltVal) {
allMatched = false;
break;
}
}
if (allMatched) {
eltVal = lowestEltVal;
break;
}
size *= 2;
}
// Second, determine the rotation to make the element be: 0^m 1^n.
for (unsigned i = 0; i < size; ++i) {
eltVal = ror(eltVal, size);
uint32_t clz = countLeadingZeros(eltVal) - (64 - size);
uint32_t cto = CountTrailingOnes_64(eltVal);
if (clz + cto == size) {
// Encode in immr the number of RORs it would take to get *from* this
// element value to our target value, where i+1 is the number of RORs
// to go the opposite direction.
unsigned immr = size - (i + 1);
// If size has a 1 in the n'th bit, create a value that has zeroes in
// bits [0, n] and ones above that.
uint64_t nimms = ~(size-1) << 1;
// Or the CTO value into the low bits, which must be below the Nth bit
// bit mentioned above.
nimms |= (cto-1);
// Extract the seventh bit and toggle it to create the N field.
unsigned N = ((nimms >> 6) & 1) ^ 1;
encoding = (N << 12) | (immr << 6) | (nimms & 0x3f);
return true;
}
}
return false;
}
/// isLogicalImmediate - Return true if the immediate is valid for a logical
/// immediate instruction of the given register size. Return false otherwise.
static inline bool isLogicalImmediate(uint64_t imm, unsigned regSize) {
uint64_t encoding;
return processLogicalImmediate(imm, regSize, encoding);
}
/// encodeLogicalImmediate - Return the encoded immediate value for a logical
/// immediate instruction of the given register size.
static inline uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize) {
uint64_t encoding = 0;
bool res = processLogicalImmediate(imm, regSize, encoding);
assert(res && "invalid logical immediate");
(void)res;
return encoding;
}
/// decodeLogicalImmediate - Decode a logical immediate value in the form
/// "N:immr:imms" (where the immr and imms fields are each 6 bits) into the
/// integer value it represents with regSize bits.
static inline uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize) {
// Extract the N, imms, and immr fields.
unsigned N = (val >> 12) & 1;
unsigned immr = (val >> 6) & 0x3f;
unsigned imms = val & 0x3f;
assert((regSize == 64 || N == 0) && "undefined logical immediate encoding");
int len = 31 - countLeadingZeros((N << 6) | (~imms & 0x3f));
assert(len >= 0 && "undefined logical immediate encoding");
unsigned size = (1 << len);
unsigned R = immr & (size - 1);
unsigned S = imms & (size - 1);
assert(S != size - 1 && "undefined logical immediate encoding");
uint64_t pattern = (1ULL << (S + 1)) - 1;
for (unsigned i = 0; i < R; ++i)
pattern = ror(pattern, size);
// Replicate the pattern to fill the regSize.
while (size != regSize) {
pattern |= (pattern << size);
size *= 2;
}
return pattern;
}
/// isValidDecodeLogicalImmediate - Check to see if the logical immediate value
/// in the form "N:immr:imms" (where the immr and imms fields are each 6 bits)
/// is a valid encoding for an integer value with regSize bits.
static inline bool isValidDecodeLogicalImmediate(uint64_t val,
unsigned regSize) {
// Extract the N and imms fields needed for checking.
unsigned N = (val >> 12) & 1;
unsigned imms = val & 0x3f;
if (regSize == 32 && N != 0) // undefined logical immediate encoding
return false;
int len = 31 - countLeadingZeros((N << 6) | (~imms & 0x3f));
if (len < 0) // undefined logical immediate encoding
return false;
unsigned size = (1 << len);
unsigned S = imms & (size - 1);
if (S == size - 1) // undefined logical immediate encoding
return false;
return true;
}
//===----------------------------------------------------------------------===//
// Floating-point Immediates
//
static inline float getFPImmFloat(unsigned Imm) {
// We expect an 8-bit binary encoding of a floating-point number here.
union {
uint32_t I;
float F;
} FPUnion;
uint8_t Sign = (Imm >> 7) & 0x1;
uint8_t Exp = (Imm >> 4) & 0x7;
uint8_t Mantissa = Imm & 0xf;
// 8-bit FP iEEEE Float Encoding
// abcd efgh aBbbbbbc defgh000 00000000 00000000
//
// where B = NOT(b);
FPUnion.I = 0;
FPUnion.I |= Sign << 31;
FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
FPUnion.I |= (Exp & 0x3) << 23;
FPUnion.I |= Mantissa << 19;
return FPUnion.F;
}
/// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
/// floating-point value. If the value cannot be represented as an 8-bit
/// floating-point value, then return -1.
static inline int getFP32Imm(const APInt &Imm) {
uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
// We can handle 4 bits of mantissa.
// mantissa = (16+UInt(e:f:g:h))/16.
if (Mantissa & 0x7ffff)
return -1;
Mantissa >>= 19;
if ((Mantissa & 0xf) != Mantissa)
return -1;
// We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
if (Exp < -3 || Exp > 4)
return -1;
Exp = ((Exp+3) & 0x7) ^ 4;
return ((int)Sign << 7) | (Exp << 4) | Mantissa;
}
static inline int getFP32Imm(const APFloat &FPImm) {
return getFP32Imm(FPImm.bitcastToAPInt());
}
/// getFP64Imm - Return an 8-bit floating-point version of the 64-bit
/// floating-point value. If the value cannot be represented as an 8-bit
/// floating-point value, then return -1.
static inline int getFP64Imm(const APInt &Imm) {
uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL;
// We can handle 4 bits of mantissa.
// mantissa = (16+UInt(e:f:g:h))/16.
if (Mantissa & 0xffffffffffffULL)
return -1;
Mantissa >>= 48;
if ((Mantissa & 0xf) != Mantissa)
return -1;
// We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
if (Exp < -3 || Exp > 4)
return -1;
Exp = ((Exp+3) & 0x7) ^ 4;
return ((int)Sign << 7) | (Exp << 4) | Mantissa;
}
static inline int getFP64Imm(const APFloat &FPImm) {
return getFP64Imm(FPImm.bitcastToAPInt());
}
//===--------------------------------------------------------------------===//
// AdvSIMD Modified Immediates
//===--------------------------------------------------------------------===//
// 0x00 0x00 0x00 abcdefgh 0x00 0x00 0x00 abcdefgh
static inline bool isAdvSIMDModImmType1(uint64_t Imm) {
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
((Imm & 0xffffff00ffffff00ULL) == 0);
}
static inline uint8_t encodeAdvSIMDModImmType1(uint64_t Imm) {
return (Imm & 0xffULL);
}
static inline uint64_t decodeAdvSIMDModImmType1(uint8_t Imm) {
uint64_t EncVal = Imm;
return (EncVal << 32) | EncVal;
}
// 0x00 0x00 abcdefgh 0x00 0x00 0x00 abcdefgh 0x00
static inline bool isAdvSIMDModImmType2(uint64_t Imm) {
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
((Imm & 0xffff00ffffff00ffULL) == 0);
}
static inline uint8_t encodeAdvSIMDModImmType2(uint64_t Imm) {
return (Imm & 0xff00ULL) >> 8;
}
static inline uint64_t decodeAdvSIMDModImmType2(uint8_t Imm) {
uint64_t EncVal = Imm;
return (EncVal << 40) | (EncVal << 8);
}
// 0x00 abcdefgh 0x00 0x00 0x00 abcdefgh 0x00 0x00
static inline bool isAdvSIMDModImmType3(uint64_t Imm) {
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
((Imm & 0xff00ffffff00ffffULL) == 0);
}
static inline uint8_t encodeAdvSIMDModImmType3(uint64_t Imm) {
return (Imm & 0xff0000ULL) >> 16;
}
static inline uint64_t decodeAdvSIMDModImmType3(uint8_t Imm) {
uint64_t EncVal = Imm;
return (EncVal << 48) | (EncVal << 16);
}
// abcdefgh 0x00 0x00 0x00 abcdefgh 0x00 0x00 0x00
static inline bool isAdvSIMDModImmType4(uint64_t Imm) {
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
((Imm & 0x00ffffff00ffffffULL) == 0);
}
static inline uint8_t encodeAdvSIMDModImmType4(uint64_t Imm) {
return (Imm & 0xff000000ULL) >> 24;
}
static inline uint64_t decodeAdvSIMDModImmType4(uint8_t Imm) {
uint64_t EncVal = Imm;
return (EncVal << 56) | (EncVal << 24);
}
// 0x00 abcdefgh 0x00 abcdefgh 0x00 abcdefgh 0x00 abcdefgh
static inline bool isAdvSIMDModImmType5(uint64_t Imm) {
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
(((Imm & 0x00ff0000ULL) >> 16) == (Imm & 0x000000ffULL)) &&
((Imm & 0xff00ff00ff00ff00ULL) == 0);
}
static inline uint8_t encodeAdvSIMDModImmType5(uint64_t Imm) {
return (Imm & 0xffULL);
}
static inline uint64_t decodeAdvSIMDModImmType5(uint8_t Imm) {
uint64_t EncVal = Imm;
return (EncVal << 48) | (EncVal << 32) | (EncVal << 16) | EncVal;
}
// abcdefgh 0x00 abcdefgh 0x00 abcdefgh 0x00 abcdefgh 0x00
static inline bool isAdvSIMDModImmType6(uint64_t Imm) {
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
(((Imm & 0xff000000ULL) >> 16) == (Imm & 0x0000ff00ULL)) &&
((Imm & 0x00ff00ff00ff00ffULL) == 0);
}
static inline uint8_t encodeAdvSIMDModImmType6(uint64_t Imm) {
return (Imm & 0xff00ULL) >> 8;
}
static inline uint64_t decodeAdvSIMDModImmType6(uint8_t Imm) {
uint64_t EncVal = Imm;
return (EncVal << 56) | (EncVal << 40) | (EncVal << 24) | (EncVal << 8);
}
// 0x00 0x00 abcdefgh 0xFF 0x00 0x00 abcdefgh 0xFF
static inline bool isAdvSIMDModImmType7(uint64_t Imm) {
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
((Imm & 0xffff00ffffff00ffULL) == 0x000000ff000000ffULL);
}
static inline uint8_t encodeAdvSIMDModImmType7(uint64_t Imm) {
return (Imm & 0xff00ULL) >> 8;
}
static inline uint64_t decodeAdvSIMDModImmType7(uint8_t Imm) {
uint64_t EncVal = Imm;
return (EncVal << 40) | (EncVal << 8) | 0x000000ff000000ffULL;
}
// 0x00 abcdefgh 0xFF 0xFF 0x00 abcdefgh 0xFF 0xFF
static inline bool isAdvSIMDModImmType8(uint64_t Imm) {
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
((Imm & 0xff00ffffff00ffffULL) == 0x0000ffff0000ffffULL);
}
static inline uint64_t decodeAdvSIMDModImmType8(uint8_t Imm) {
uint64_t EncVal = Imm;
return (EncVal << 48) | (EncVal << 16) | 0x0000ffff0000ffffULL;
}
static inline uint8_t encodeAdvSIMDModImmType8(uint64_t Imm) {
return (Imm & 0x00ff0000ULL) >> 16;
}
// abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh
static inline bool isAdvSIMDModImmType9(uint64_t Imm) {
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
((Imm >> 48) == (Imm & 0x0000ffffULL)) &&
((Imm >> 56) == (Imm & 0x000000ffULL));
}
static inline uint8_t encodeAdvSIMDModImmType9(uint64_t Imm) {
return (Imm & 0xffULL);
}
static inline uint64_t decodeAdvSIMDModImmType9(uint8_t Imm) {
uint64_t EncVal = Imm;
EncVal |= (EncVal << 8);
EncVal |= (EncVal << 16);
EncVal |= (EncVal << 32);
return EncVal;
}
// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
// cmode: 1110, op: 1
static inline bool isAdvSIMDModImmType10(uint64_t Imm) {
uint64_t ByteA = Imm & 0xff00000000000000ULL;
uint64_t ByteB = Imm & 0x00ff000000000000ULL;
uint64_t ByteC = Imm & 0x0000ff0000000000ULL;
uint64_t ByteD = Imm & 0x000000ff00000000ULL;
uint64_t ByteE = Imm & 0x00000000ff000000ULL;
uint64_t ByteF = Imm & 0x0000000000ff0000ULL;
uint64_t ByteG = Imm & 0x000000000000ff00ULL;
uint64_t ByteH = Imm & 0x00000000000000ffULL;
return (ByteA == 0ULL || ByteA == 0xff00000000000000ULL) &&
(ByteB == 0ULL || ByteB == 0x00ff000000000000ULL) &&
(ByteC == 0ULL || ByteC == 0x0000ff0000000000ULL) &&
(ByteD == 0ULL || ByteD == 0x000000ff00000000ULL) &&
(ByteE == 0ULL || ByteE == 0x00000000ff000000ULL) &&
(ByteF == 0ULL || ByteF == 0x0000000000ff0000ULL) &&
(ByteG == 0ULL || ByteG == 0x000000000000ff00ULL) &&
(ByteH == 0ULL || ByteH == 0x00000000000000ffULL);
}
static inline uint8_t encodeAdvSIMDModImmType10(uint64_t Imm) {
bool BitA = Imm & 0xff00000000000000ULL;
bool BitB = Imm & 0x00ff000000000000ULL;
bool BitC = Imm & 0x0000ff0000000000ULL;
bool BitD = Imm & 0x000000ff00000000ULL;
bool BitE = Imm & 0x00000000ff000000ULL;
bool BitF = Imm & 0x0000000000ff0000ULL;
bool BitG = Imm & 0x000000000000ff00ULL;
bool BitH = Imm & 0x00000000000000ffULL;
unsigned EncVal = BitA;
EncVal <<= 1;
EncVal |= BitB;
EncVal <<= 1;
EncVal |= BitC;
EncVal <<= 1;
EncVal |= BitD;
EncVal <<= 1;
EncVal |= BitE;
EncVal <<= 1;
EncVal |= BitF;
EncVal <<= 1;
EncVal |= BitG;
EncVal <<= 1;
EncVal |= BitH;
return EncVal;
}
static inline uint64_t decodeAdvSIMDModImmType10(uint8_t Imm) {
uint64_t EncVal = 0;
if (Imm & 0x80) EncVal |= 0xff00000000000000ULL;
if (Imm & 0x40) EncVal |= 0x00ff000000000000ULL;
if (Imm & 0x20) EncVal |= 0x0000ff0000000000ULL;
if (Imm & 0x10) EncVal |= 0x000000ff00000000ULL;
if (Imm & 0x08) EncVal |= 0x00000000ff000000ULL;
if (Imm & 0x04) EncVal |= 0x0000000000ff0000ULL;
if (Imm & 0x02) EncVal |= 0x000000000000ff00ULL;
if (Imm & 0x01) EncVal |= 0x00000000000000ffULL;
return EncVal;
}
// aBbbbbbc defgh000 0x00 0x00 aBbbbbbc defgh000 0x00 0x00
static inline bool isAdvSIMDModImmType11(uint64_t Imm) {
uint64_t BString = (Imm & 0x7E000000ULL) >> 25;
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
(BString == 0x1f || BString == 0x20) &&
((Imm & 0x0007ffff0007ffffULL) == 0);
}
static inline uint8_t encodeAdvSIMDModImmType11(uint64_t Imm) {
bool BitA = (Imm & 0x80000000ULL);
bool BitB = (Imm & 0x20000000ULL);
bool BitC = (Imm & 0x01000000ULL);
bool BitD = (Imm & 0x00800000ULL);
bool BitE = (Imm & 0x00400000ULL);
bool BitF = (Imm & 0x00200000ULL);
bool BitG = (Imm & 0x00100000ULL);
bool BitH = (Imm & 0x00080000ULL);
unsigned EncVal = BitA;
EncVal <<= 1;
EncVal |= BitB;
EncVal <<= 1;
EncVal |= BitC;
EncVal <<= 1;
EncVal |= BitD;
EncVal <<= 1;
EncVal |= BitE;
EncVal <<= 1;
EncVal |= BitF;
EncVal <<= 1;
EncVal |= BitG;
EncVal <<= 1;
EncVal |= BitH;
return EncVal;
}
static inline uint64_t decodeAdvSIMDModImmType11(uint8_t Imm) {
uint64_t EncVal = 0;
if (Imm & 0x80) EncVal |= 0x80000000ULL;
if (Imm & 0x40) EncVal |= 0x3e000000ULL;
else EncVal |= 0x40000000ULL;
if (Imm & 0x20) EncVal |= 0x01000000ULL;
if (Imm & 0x10) EncVal |= 0x00800000ULL;
if (Imm & 0x08) EncVal |= 0x00400000ULL;
if (Imm & 0x04) EncVal |= 0x00200000ULL;
if (Imm & 0x02) EncVal |= 0x00100000ULL;
if (Imm & 0x01) EncVal |= 0x00080000ULL;
return (EncVal << 32) | EncVal;
}
// aBbbbbbb bbcdefgh 0x00 0x00 0x00 0x00 0x00 0x00
static inline bool isAdvSIMDModImmType12(uint64_t Imm) {
uint64_t BString = (Imm & 0x7fc0000000000000ULL) >> 54;
return ((BString == 0xff || BString == 0x100) &&
((Imm & 0x0000ffffffffffffULL) == 0));
}
static inline uint8_t encodeAdvSIMDModImmType12(uint64_t Imm) {
bool BitA = (Imm & 0x8000000000000000ULL);
bool BitB = (Imm & 0x0040000000000000ULL);
bool BitC = (Imm & 0x0020000000000000ULL);
bool BitD = (Imm & 0x0010000000000000ULL);
bool BitE = (Imm & 0x0008000000000000ULL);
bool BitF = (Imm & 0x0004000000000000ULL);
bool BitG = (Imm & 0x0002000000000000ULL);
bool BitH = (Imm & 0x0001000000000000ULL);
unsigned EncVal = BitA;
EncVal <<= 1;
EncVal |= BitB;
EncVal <<= 1;
EncVal |= BitC;
EncVal <<= 1;
EncVal |= BitD;
EncVal <<= 1;
EncVal |= BitE;
EncVal <<= 1;
EncVal |= BitF;
EncVal <<= 1;
EncVal |= BitG;
EncVal <<= 1;
EncVal |= BitH;
return EncVal;
}
static inline uint64_t decodeAdvSIMDModImmType12(uint8_t Imm) {
uint64_t EncVal = 0;
if (Imm & 0x80) EncVal |= 0x8000000000000000ULL;
if (Imm & 0x40) EncVal |= 0x3fc0000000000000ULL;
else EncVal |= 0x4000000000000000ULL;
if (Imm & 0x20) EncVal |= 0x0020000000000000ULL;
if (Imm & 0x10) EncVal |= 0x0010000000000000ULL;
if (Imm & 0x08) EncVal |= 0x0008000000000000ULL;
if (Imm & 0x04) EncVal |= 0x0004000000000000ULL;
if (Imm & 0x02) EncVal |= 0x0002000000000000ULL;
if (Imm & 0x01) EncVal |= 0x0001000000000000ULL;
return (EncVal << 32) | EncVal;
}
} // end namespace ARM64_AM
} // end namespace llvm
#endif

View File

@ -0,0 +1,533 @@
//===-- ARM64AsmBackend.cpp - ARM64 Assembler Backend ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "ARM64.h"
#include "ARM64RegisterInfo.h"
#include "MCTargetDesc/ARM64FixupKinds.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachO.h"
using namespace llvm;
namespace {
class ARM64AsmBackend : public MCAsmBackend {
static const unsigned PCRelFlagVal =
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits | MCFixupKindInfo::FKF_IsPCRel;
public:
ARM64AsmBackend(const Target &T) : MCAsmBackend() {}
unsigned getNumFixupKinds() const { return ARM64::NumTargetFixupKinds; }
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
const static MCFixupKindInfo Infos[ARM64::NumTargetFixupKinds] = {
// This table *must* be in the order that the fixup_* kinds are defined in
// ARM64FixupKinds.h.
//
// Name Offset (bits) Size (bits) Flags
{ "fixup_arm64_pcrel_adr_imm21", 0, 32, PCRelFlagVal },
{ "fixup_arm64_pcrel_adrp_imm21", 0, 32, PCRelFlagVal },
{ "fixup_arm64_add_imm12", 10, 12, 0 },
{ "fixup_arm64_ldst_imm12_scale1", 10, 12, 0 },
{ "fixup_arm64_ldst_imm12_scale2", 10, 12, 0 },
{ "fixup_arm64_ldst_imm12_scale4", 10, 12, 0 },
{ "fixup_arm64_ldst_imm12_scale8", 10, 12, 0 },
{ "fixup_arm64_ldst_imm12_scale16", 10, 12, 0 },
{ "fixup_arm64_movw", 5, 16, 0 },
{ "fixup_arm64_pcrel_branch14", 5, 14, PCRelFlagVal },
{ "fixup_arm64_pcrel_imm19", 5, 19, PCRelFlagVal },
{ "fixup_arm64_pcrel_branch26", 0, 26, PCRelFlagVal },
{ "fixup_arm64_pcrel_call26", 0, 26, PCRelFlagVal },
{ "fixup_arm64_tlsdesc_call", 0, 0, 0 }
};
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
"Invalid kind!");
return Infos[Kind - FirstTargetFixupKind];
}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value, bool IsPCRel) const;
bool mayNeedRelaxation(const MCInst &Inst) const;
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const;
void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
void HandleAssemblerFlag(MCAssemblerFlag Flag) {}
unsigned getPointerSize() const { return 8; }
};
} // end anonymous namespace
/// \brief The number of bytes the fixup may change.
static unsigned getFixupKindNumBytes(unsigned Kind) {
switch (Kind) {
default:
assert(0 && "Unknown fixup kind!");
case ARM64::fixup_arm64_tlsdesc_call:
return 0;
case FK_Data_1:
return 1;
case FK_Data_2:
case ARM64::fixup_arm64_movw:
return 2;
case ARM64::fixup_arm64_pcrel_branch14:
case ARM64::fixup_arm64_add_imm12:
case ARM64::fixup_arm64_ldst_imm12_scale1:
case ARM64::fixup_arm64_ldst_imm12_scale2:
case ARM64::fixup_arm64_ldst_imm12_scale4:
case ARM64::fixup_arm64_ldst_imm12_scale8:
case ARM64::fixup_arm64_ldst_imm12_scale16:
case ARM64::fixup_arm64_pcrel_imm19:
return 3;
case ARM64::fixup_arm64_pcrel_adr_imm21:
case ARM64::fixup_arm64_pcrel_adrp_imm21:
case ARM64::fixup_arm64_pcrel_branch26:
case ARM64::fixup_arm64_pcrel_call26:
case FK_Data_4:
return 4;
case FK_Data_8:
return 8;
}
}
static unsigned AdrImmBits(unsigned Value) {
unsigned lo2 = Value & 0x3;
unsigned hi19 = (Value & 0x1ffffc) >> 2;
return (hi19 << 5) | (lo2 << 29);
}
static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
int64_t SignedValue = static_cast<int64_t>(Value);
switch (Kind) {
default:
assert(false && "Unknown fixup kind!");
case ARM64::fixup_arm64_pcrel_adr_imm21:
if (SignedValue > 2097151 || SignedValue < -2097152)
report_fatal_error("fixup value out of range");
return AdrImmBits(Value & 0x1fffffULL);
case ARM64::fixup_arm64_pcrel_adrp_imm21:
return AdrImmBits((Value & 0x1fffff000ULL) >> 12);
case ARM64::fixup_arm64_pcrel_imm19:
// Signed 21-bit immediate
if (SignedValue > 2097151 || SignedValue < -2097152)
report_fatal_error("fixup value out of range");
// Low two bits are not encoded.
return (Value >> 2) & 0x7ffff;
case ARM64::fixup_arm64_add_imm12:
case ARM64::fixup_arm64_ldst_imm12_scale1:
// Unsigned 12-bit immediate
if (Value >= 0x1000)
report_fatal_error("invalid imm12 fixup value");
return Value;
case ARM64::fixup_arm64_ldst_imm12_scale2:
// Unsigned 12-bit immediate which gets multiplied by 2
if (Value & 1 || Value >= 0x2000)
report_fatal_error("invalid imm12 fixup value");
return Value >> 1;
case ARM64::fixup_arm64_ldst_imm12_scale4:
// Unsigned 12-bit immediate which gets multiplied by 4
if (Value & 3 || Value >= 0x4000)
report_fatal_error("invalid imm12 fixup value");
return Value >> 2;
case ARM64::fixup_arm64_ldst_imm12_scale8:
// Unsigned 12-bit immediate which gets multiplied by 8
if (Value & 7 || Value >= 0x8000)
report_fatal_error("invalid imm12 fixup value");
return Value >> 3;
case ARM64::fixup_arm64_ldst_imm12_scale16:
// Unsigned 12-bit immediate which gets multiplied by 16
if (Value & 15 || Value >= 0x10000)
report_fatal_error("invalid imm12 fixup value");
return Value >> 4;
case ARM64::fixup_arm64_movw:
report_fatal_error("no resolvable MOVZ/MOVK fixups supported yet");
return Value;
case ARM64::fixup_arm64_pcrel_branch14:
// Signed 16-bit immediate
if (SignedValue > 32767 || SignedValue < -32768)
report_fatal_error("fixup value out of range");
// Low two bits are not encoded (4-byte alignment assumed).
if (Value & 0x3)
report_fatal_error("fixup not sufficiently aligned");
return (Value >> 2) & 0x3fff;
case ARM64::fixup_arm64_pcrel_branch26:
case ARM64::fixup_arm64_pcrel_call26:
// Signed 28-bit immediate
if (SignedValue > 134217727 || SignedValue < -134217728)
report_fatal_error("fixup value out of range");
// Low two bits are not encoded (4-byte alignment assumed).
if (Value & 0x3)
report_fatal_error("fixup not sufficiently aligned");
return (Value >> 2) & 0x3ffffff;
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
case FK_Data_8:
return Value;
}
}
void ARM64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
bool IsPCRel) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
if (!Value)
return; // Doesn't change encoding.
MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
// Apply any target-specific value adjustments.
Value = adjustFixupValue(Fixup.getKind(), Value);
// Shift the value into position.
Value <<= Info.TargetOffset;
unsigned Offset = Fixup.getOffset();
assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
for (unsigned i = 0; i != NumBytes; ++i)
Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
}
bool ARM64AsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
return false;
}
bool ARM64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const {
// FIXME: This isn't correct for ARM64. Just moving the "generic" logic
// into the targets for now.
//
// Relax if the value is too big for a (signed) i8.
return int64_t(Value) != int64_t(int8_t(Value));
}
void ARM64AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
assert(false && "ARM64AsmBackend::relaxInstruction() unimplemented");
}
bool ARM64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
// If the count is not 4-byte aligned, we must be writing data into the text
// section (otherwise we have unaligned instructions, and thus have far
// bigger problems), so just write zeros instead.
if ((Count & 3) != 0) {
for (uint64_t i = 0, e = (Count & 3); i != e; ++i)
OW->Write8(0);
}
// We are properly aligned, so write NOPs as requested.
Count /= 4;
for (uint64_t i = 0; i != Count; ++i)
OW->Write32(0xd503201f);
return true;
}
namespace {
namespace CU {
/// \brief Compact unwind encoding values.
enum CompactUnwindEncodings {
/// \brief A "frameless" leaf function, where no non-volatile registers are
/// saved. The return remains in LR throughout the function.
UNWIND_ARM64_MODE_FRAMELESS = 0x02000000,
/// \brief No compact unwind encoding available. Instead the low 23-bits of
/// the compact unwind encoding is the offset of the DWARF FDE in the
/// __eh_frame section. This mode is never used in object files. It is only
/// generated by the linker in final linked images, which have only DWARF info
/// for a function.
UNWIND_ARM64_MODE_DWARF = 0x03000000,
/// \brief This is a standard arm64 prologue where FP/LR are immediately
/// pushed on the stack, then SP is copied to FP. If there are any
/// non-volatile register saved, they are copied into the stack fame in pairs
/// in a contiguous ranger right below the saved FP/LR pair. Any subset of the
/// five X pairs and four D pairs can be saved, but the memory layout must be
/// in register number order.
UNWIND_ARM64_MODE_FRAME = 0x04000000,
/// \brief Frame register pair encodings.
UNWIND_ARM64_FRAME_X19_X20_PAIR = 0x00000001,
UNWIND_ARM64_FRAME_X21_X22_PAIR = 0x00000002,
UNWIND_ARM64_FRAME_X23_X24_PAIR = 0x00000004,
UNWIND_ARM64_FRAME_X25_X26_PAIR = 0x00000008,
UNWIND_ARM64_FRAME_X27_X28_PAIR = 0x00000010,
UNWIND_ARM64_FRAME_D8_D9_PAIR = 0x00000100,
UNWIND_ARM64_FRAME_D10_D11_PAIR = 0x00000200,
UNWIND_ARM64_FRAME_D12_D13_PAIR = 0x00000400,
UNWIND_ARM64_FRAME_D14_D15_PAIR = 0x00000800
};
} // end CU namespace
// FIXME: This should be in a separate file.
class DarwinARM64AsmBackend : public ARM64AsmBackend {
const MCRegisterInfo &MRI;
/// \brief Encode compact unwind stack adjustment for frameless functions.
/// See UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK in compact_unwind_encoding.h.
/// The stack size always needs to be 16 byte aligned.
uint32_t encodeStackAdjustment(uint32_t StackSize) const {
return (StackSize / 16) << 12;
}
public:
DarwinARM64AsmBackend(const Target &T, const MCRegisterInfo &MRI)
: ARM64AsmBackend(T), MRI(MRI) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
return createARM64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64,
MachO::CPU_SUBTYPE_ARM64_ALL);
}
virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
// Any section for which the linker breaks things into atoms needs to
// preserve symbols, including assembler local symbols, to identify
// those atoms. These sections are:
// Sections of type:
//
// S_CSTRING_LITERALS (e.g. __cstring)
// S_LITERAL_POINTERS (e.g. objc selector pointers)
// S_16BYTE_LITERALS, S_8BYTE_LITERALS, S_4BYTE_LITERALS
//
// Sections named:
//
// __TEXT,__eh_frame
// __TEXT,__ustring
// __DATA,__cfstring
// __DATA,__objc_classrefs
// __DATA,__objc_catlist
//
// FIXME: It would be better if the compiler used actual linker local
// symbols for each of these sections rather than preserving what
// are ostensibly assembler local symbols.
const MCSectionMachO &SMO = static_cast<const MCSectionMachO &>(Section);
return (SMO.getType() == MachO::S_CSTRING_LITERALS ||
SMO.getType() == MachO::S_4BYTE_LITERALS ||
SMO.getType() == MachO::S_8BYTE_LITERALS ||
SMO.getType() == MachO::S_16BYTE_LITERALS ||
SMO.getType() == MachO::S_LITERAL_POINTERS ||
(SMO.getSegmentName() == "__TEXT" &&
(SMO.getSectionName() == "__eh_frame" ||
SMO.getSectionName() == "__ustring")) ||
(SMO.getSegmentName() == "__DATA" &&
(SMO.getSectionName() == "__cfstring" ||
SMO.getSectionName() == "__objc_classrefs" ||
SMO.getSectionName() == "__objc_catlist")));
}
/// \brief Generate the compact unwind encoding from the CFI directives.
virtual uint32_t
generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const
override {
if (Instrs.empty())
return CU::UNWIND_ARM64_MODE_FRAMELESS;
bool HasFP = false;
unsigned StackSize = 0;
uint32_t CompactUnwindEncoding = 0;
for (size_t i = 0, e = Instrs.size(); i != e; ++i) {
const MCCFIInstruction &Inst = Instrs[i];
switch (Inst.getOperation()) {
default:
// Cannot handle this directive: bail out.
return CU::UNWIND_ARM64_MODE_DWARF;
case MCCFIInstruction::OpDefCfa: {
// Defines a frame pointer.
assert(getXRegFromWReg(MRI.getLLVMRegNum(Inst.getRegister(), true)) ==
ARM64::FP &&
"Invalid frame pointer!");
assert(i + 2 < e && "Insufficient CFI instructions to define a frame!");
const MCCFIInstruction &LRPush = Instrs[++i];
assert(LRPush.getOperation() == MCCFIInstruction::OpOffset &&
"Link register not pushed!");
const MCCFIInstruction &FPPush = Instrs[++i];
assert(FPPush.getOperation() == MCCFIInstruction::OpOffset &&
"Frame pointer not pushed!");
unsigned LRReg = MRI.getLLVMRegNum(LRPush.getRegister(), true);
unsigned FPReg = MRI.getLLVMRegNum(FPPush.getRegister(), true);
LRReg = getXRegFromWReg(LRReg);
FPReg = getXRegFromWReg(FPReg);
assert(LRReg == ARM64::LR && FPReg == ARM64::FP &&
"Pushing invalid registers for frame!");
// Indicate that the function has a frame.
CompactUnwindEncoding |= CU::UNWIND_ARM64_MODE_FRAME;
HasFP = true;
break;
}
case MCCFIInstruction::OpDefCfaOffset: {
assert(StackSize == 0 && "We already have the CFA offset!");
StackSize = std::abs(Inst.getOffset());
break;
}
case MCCFIInstruction::OpOffset: {
// Registers are saved in pairs. We expect there to be two consecutive
// `.cfi_offset' instructions with the appropriate registers specified.
unsigned Reg1 = MRI.getLLVMRegNum(Inst.getRegister(), true);
if (i + 1 == e)
return CU::UNWIND_ARM64_MODE_DWARF;
const MCCFIInstruction &Inst2 = Instrs[++i];
if (Inst2.getOperation() != MCCFIInstruction::OpOffset)
return CU::UNWIND_ARM64_MODE_DWARF;
unsigned Reg2 = MRI.getLLVMRegNum(Inst2.getRegister(), true);
// N.B. The encodings must be in register number order, and the X
// registers before the D registers.
// X19/X20 pair = 0x00000001,
// X21/X22 pair = 0x00000002,
// X23/X24 pair = 0x00000004,
// X25/X26 pair = 0x00000008,
// X27/X28 pair = 0x00000010
Reg1 = getXRegFromWReg(Reg1);
Reg2 = getXRegFromWReg(Reg2);
if (Reg1 == ARM64::X19 && Reg2 == ARM64::X20 &&
(CompactUnwindEncoding & 0xF1E) == 0)
CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X19_X20_PAIR;
else if (Reg1 == ARM64::X21 && Reg2 == ARM64::X22 &&
(CompactUnwindEncoding & 0xF1C) == 0)
CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X21_X22_PAIR;
else if (Reg1 == ARM64::X23 && Reg2 == ARM64::X24 &&
(CompactUnwindEncoding & 0xF18) == 0)
CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X23_X24_PAIR;
else if (Reg1 == ARM64::X25 && Reg2 == ARM64::X26 &&
(CompactUnwindEncoding & 0xF10) == 0)
CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X25_X26_PAIR;
else if (Reg1 == ARM64::X27 && Reg2 == ARM64::X28 &&
(CompactUnwindEncoding & 0xF00) == 0)
CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X27_X28_PAIR;
else {
Reg1 = getDRegFromBReg(Reg1);
Reg2 = getDRegFromBReg(Reg2);
// D8/D9 pair = 0x00000100,
// D10/D11 pair = 0x00000200,
// D12/D13 pair = 0x00000400,
// D14/D15 pair = 0x00000800
if (Reg1 == ARM64::D8 && Reg2 == ARM64::D9 &&
(CompactUnwindEncoding & 0xE00) == 0)
CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D8_D9_PAIR;
else if (Reg1 == ARM64::D10 && Reg2 == ARM64::D11 &&
(CompactUnwindEncoding & 0xC00) == 0)
CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D10_D11_PAIR;
else if (Reg1 == ARM64::D12 && Reg2 == ARM64::D13 &&
(CompactUnwindEncoding & 0x800) == 0)
CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D12_D13_PAIR;
else if (Reg1 == ARM64::D14 && Reg2 == ARM64::D15)
CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D14_D15_PAIR;
else
// A pair was pushed which we cannot handle.
return CU::UNWIND_ARM64_MODE_DWARF;
}
break;
}
}
}
if (!HasFP) {
// With compact unwind info we can only represent stack adjustments of up
// to 65520 bytes.
if (StackSize > 65520)
return CU::UNWIND_ARM64_MODE_DWARF;
CompactUnwindEncoding |= CU::UNWIND_ARM64_MODE_FRAMELESS;
CompactUnwindEncoding |= encodeStackAdjustment(StackSize);
}
return CompactUnwindEncoding;
}
};
} // end anonymous namespace
namespace {
class ELFARM64AsmBackend : public ARM64AsmBackend {
public:
uint8_t OSABI;
ELFARM64AsmBackend(const Target &T, uint8_t OSABI)
: ARM64AsmBackend(T), OSABI(OSABI) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
return createARM64ELFObjectWriter(OS, OSABI);
}
void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFixup &Fixup, const MCFragment *DF,
const MCValue &Target, uint64_t &Value,
bool &IsResolved) override;
};
void ELFARM64AsmBackend::processFixupValue(const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFixup &Fixup,
const MCFragment *DF,
const MCValue &Target,
uint64_t &Value, bool &IsResolved) {
// The ADRP instruction adds some multiple of 0x1000 to the current PC &
// ~0xfff. This means that the required offset to reach a symbol can vary by
// up to one step depending on where the ADRP is in memory. For example:
//
// ADRP x0, there
// there:
//
// If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and
// we'll need that as an offset. At any other address "there" will be in the
// same page as the ADRP and the instruction should encode 0x0. Assuming the
// section isn't 0x1000-aligned, we therefore need to delegate this decision
// to the linker -- a relocation!
if ((uint32_t)Fixup.getKind() == ARM64::fixup_arm64_pcrel_adrp_imm21)
IsResolved = false;
}
}
MCAsmBackend *llvm::createARM64AsmBackend(const Target &T,
const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin())
return new DarwinARM64AsmBackend(T, MRI);
assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target");
return new ELFARM64AsmBackend(T, TheTriple.getOS());
}

View File

@ -0,0 +1,998 @@
//===-- ARM64BaseInfo.h - Top level definitions for ARM64 -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains small standalone helper functions and enum definitions for
// the ARM64 target useful for the compiler back-end and the MC libraries.
// As such, it deliberately does not include references to LLVM core
// code gen types, passes, etc..
//
//===----------------------------------------------------------------------===//
#ifndef ARM64BASEINFO_H
#define ARM64BASEINFO_H
#include "ARM64MCTargetDesc.h"
#include "llvm/Support/ErrorHandling.h"
namespace llvm {
inline static unsigned getWRegFromXReg(unsigned Reg) {
switch (Reg) {
case ARM64::X0: return ARM64::W0;
case ARM64::X1: return ARM64::W1;
case ARM64::X2: return ARM64::W2;
case ARM64::X3: return ARM64::W3;
case ARM64::X4: return ARM64::W4;
case ARM64::X5: return ARM64::W5;
case ARM64::X6: return ARM64::W6;
case ARM64::X7: return ARM64::W7;
case ARM64::X8: return ARM64::W8;
case ARM64::X9: return ARM64::W9;
case ARM64::X10: return ARM64::W10;
case ARM64::X11: return ARM64::W11;
case ARM64::X12: return ARM64::W12;
case ARM64::X13: return ARM64::W13;
case ARM64::X14: return ARM64::W14;
case ARM64::X15: return ARM64::W15;
case ARM64::X16: return ARM64::W16;
case ARM64::X17: return ARM64::W17;
case ARM64::X18: return ARM64::W18;
case ARM64::X19: return ARM64::W19;
case ARM64::X20: return ARM64::W20;
case ARM64::X21: return ARM64::W21;
case ARM64::X22: return ARM64::W22;
case ARM64::X23: return ARM64::W23;
case ARM64::X24: return ARM64::W24;
case ARM64::X25: return ARM64::W25;
case ARM64::X26: return ARM64::W26;
case ARM64::X27: return ARM64::W27;
case ARM64::X28: return ARM64::W28;
case ARM64::FP: return ARM64::W29;
case ARM64::LR: return ARM64::W30;
case ARM64::SP: return ARM64::WSP;
case ARM64::XZR: return ARM64::WZR;
}
// For anything else, return it unchanged.
return Reg;
}
inline static unsigned getXRegFromWReg(unsigned Reg) {
switch (Reg) {
case ARM64::W0: return ARM64::X0;
case ARM64::W1: return ARM64::X1;
case ARM64::W2: return ARM64::X2;
case ARM64::W3: return ARM64::X3;
case ARM64::W4: return ARM64::X4;
case ARM64::W5: return ARM64::X5;
case ARM64::W6: return ARM64::X6;
case ARM64::W7: return ARM64::X7;
case ARM64::W8: return ARM64::X8;
case ARM64::W9: return ARM64::X9;
case ARM64::W10: return ARM64::X10;
case ARM64::W11: return ARM64::X11;
case ARM64::W12: return ARM64::X12;
case ARM64::W13: return ARM64::X13;
case ARM64::W14: return ARM64::X14;
case ARM64::W15: return ARM64::X15;
case ARM64::W16: return ARM64::X16;
case ARM64::W17: return ARM64::X17;
case ARM64::W18: return ARM64::X18;
case ARM64::W19: return ARM64::X19;
case ARM64::W20: return ARM64::X20;
case ARM64::W21: return ARM64::X21;
case ARM64::W22: return ARM64::X22;
case ARM64::W23: return ARM64::X23;
case ARM64::W24: return ARM64::X24;
case ARM64::W25: return ARM64::X25;
case ARM64::W26: return ARM64::X26;
case ARM64::W27: return ARM64::X27;
case ARM64::W28: return ARM64::X28;
case ARM64::W29: return ARM64::FP;
case ARM64::W30: return ARM64::LR;
case ARM64::WSP: return ARM64::SP;
case ARM64::WZR: return ARM64::XZR;
}
// For anything else, return it unchanged.
return Reg;
}
static inline unsigned getBRegFromDReg(unsigned Reg) {
switch (Reg) {
case ARM64::D0: return ARM64::B0;
case ARM64::D1: return ARM64::B1;
case ARM64::D2: return ARM64::B2;
case ARM64::D3: return ARM64::B3;
case ARM64::D4: return ARM64::B4;
case ARM64::D5: return ARM64::B5;
case ARM64::D6: return ARM64::B6;
case ARM64::D7: return ARM64::B7;
case ARM64::D8: return ARM64::B8;
case ARM64::D9: return ARM64::B9;
case ARM64::D10: return ARM64::B10;
case ARM64::D11: return ARM64::B11;
case ARM64::D12: return ARM64::B12;
case ARM64::D13: return ARM64::B13;
case ARM64::D14: return ARM64::B14;
case ARM64::D15: return ARM64::B15;
case ARM64::D16: return ARM64::B16;
case ARM64::D17: return ARM64::B17;
case ARM64::D18: return ARM64::B18;
case ARM64::D19: return ARM64::B19;
case ARM64::D20: return ARM64::B20;
case ARM64::D21: return ARM64::B21;
case ARM64::D22: return ARM64::B22;
case ARM64::D23: return ARM64::B23;
case ARM64::D24: return ARM64::B24;
case ARM64::D25: return ARM64::B25;
case ARM64::D26: return ARM64::B26;
case ARM64::D27: return ARM64::B27;
case ARM64::D28: return ARM64::B28;
case ARM64::D29: return ARM64::B29;
case ARM64::D30: return ARM64::B30;
case ARM64::D31: return ARM64::B31;
}
// For anything else, return it unchanged.
return Reg;
}
static inline unsigned getDRegFromBReg(unsigned Reg) {
switch (Reg) {
case ARM64::B0: return ARM64::D0;
case ARM64::B1: return ARM64::D1;
case ARM64::B2: return ARM64::D2;
case ARM64::B3: return ARM64::D3;
case ARM64::B4: return ARM64::D4;
case ARM64::B5: return ARM64::D5;
case ARM64::B6: return ARM64::D6;
case ARM64::B7: return ARM64::D7;
case ARM64::B8: return ARM64::D8;
case ARM64::B9: return ARM64::D9;
case ARM64::B10: return ARM64::D10;
case ARM64::B11: return ARM64::D11;
case ARM64::B12: return ARM64::D12;
case ARM64::B13: return ARM64::D13;
case ARM64::B14: return ARM64::D14;
case ARM64::B15: return ARM64::D15;
case ARM64::B16: return ARM64::D16;
case ARM64::B17: return ARM64::D17;
case ARM64::B18: return ARM64::D18;
case ARM64::B19: return ARM64::D19;
case ARM64::B20: return ARM64::D20;
case ARM64::B21: return ARM64::D21;
case ARM64::B22: return ARM64::D22;
case ARM64::B23: return ARM64::D23;
case ARM64::B24: return ARM64::D24;
case ARM64::B25: return ARM64::D25;
case ARM64::B26: return ARM64::D26;
case ARM64::B27: return ARM64::D27;
case ARM64::B28: return ARM64::D28;
case ARM64::B29: return ARM64::D29;
case ARM64::B30: return ARM64::D30;
case ARM64::B31: return ARM64::D31;
}
// For anything else, return it unchanged.
return Reg;
}
namespace ARM64CC {
// The CondCodes constants map directly to the 4-bit encoding of the condition
// field for predicated instructions.
enum CondCode { // Meaning (integer) Meaning (floating-point)
EQ = 0x0, // Equal Equal
NE = 0x1, // Not equal Not equal, or unordered
CS = 0x2, // Carry set >, ==, or unordered
CC = 0x3, // Carry clear Less than
MI = 0x4, // Minus, negative Less than
PL = 0x5, // Plus, positive or zero >, ==, or unordered
VS = 0x6, // Overflow Unordered
VC = 0x7, // No overflow Not unordered
HI = 0x8, // Unsigned higher Greater than, or unordered
LS = 0x9, // Unsigned lower or same Less than or equal
GE = 0xa, // Greater than or equal Greater than or equal
LT = 0xb, // Less than Less than, or unordered
GT = 0xc, // Greater than Greater than
LE = 0xd, // Less than or equal <, ==, or unordered
AL = 0xe // Always (unconditional) Always (unconditional)
};
inline static const char *getCondCodeName(CondCode Code) {
// cond<0> is ignored when cond<3:1> = 111, where 1110 is 0xe (aka AL).
if ((Code & AL) == AL)
Code = AL;
switch (Code) {
case EQ: return "eq";
case NE: return "ne";
case CS: return "cs";
case CC: return "cc";
case MI: return "mi";
case PL: return "pl";
case VS: return "vs";
case VC: return "vc";
case HI: return "hi";
case LS: return "ls";
case GE: return "ge";
case LT: return "lt";
case GT: return "gt";
case LE: return "le";
case AL: return "al";
}
llvm_unreachable("Unknown condition code");
}
inline static CondCode getInvertedCondCode(CondCode Code) {
switch (Code) {
default: llvm_unreachable("Unknown condition code");
case EQ: return NE;
case NE: return EQ;
case CS: return CC;
case CC: return CS;
case MI: return PL;
case PL: return MI;
case VS: return VC;
case VC: return VS;
case HI: return LS;
case LS: return HI;
case GE: return LT;
case LT: return GE;
case GT: return LE;
case LE: return GT;
}
}
/// Given a condition code, return NZCV flags that would satisfy that condition.
/// The flag bits are in the format expected by the ccmp instructions.
/// Note that many different flag settings can satisfy a given condition code,
/// this function just returns one of them.
inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) {
// NZCV flags encoded as expected by ccmp instructions, ARMv8 ISA 5.5.7.
enum { N = 8, Z = 4, C = 2, V = 1 };
switch (Code) {
default: llvm_unreachable("Unknown condition code");
case EQ: return Z; // Z == 1
case NE: return 0; // Z == 0
case CS: return C; // C == 1
case CC: return 0; // C == 0
case MI: return N; // N == 1
case PL: return 0; // N == 0
case VS: return V; // V == 1
case VC: return 0; // V == 0
case HI: return C; // C == 1 && Z == 0
case LS: return 0; // C == 0 || Z == 1
case GE: return 0; // N == V
case LT: return N; // N != V
case GT: return 0; // Z == 0 && N == V
case LE: return Z; // Z == 1 || N != V
}
}
} // end namespace ARM64CC
namespace ARM64SYS {
enum BarrierOption {
InvalidBarrier = 0xff,
OSHLD = 0x1,
OSHST = 0x2,
OSH = 0x3,
NSHLD = 0x5,
NSHST = 0x6,
NSH = 0x7,
ISHLD = 0x9,
ISHST = 0xa,
ISH = 0xb,
LD = 0xd,
ST = 0xe,
SY = 0xf
};
inline static const char *getBarrierOptName(BarrierOption Opt) {
switch (Opt) {
default: return NULL;
case 0x1: return "oshld";
case 0x2: return "oshst";
case 0x3: return "osh";
case 0x5: return "nshld";
case 0x6: return "nshst";
case 0x7: return "nsh";
case 0x9: return "ishld";
case 0xa: return "ishst";
case 0xb: return "ish";
case 0xd: return "ld";
case 0xe: return "st";
case 0xf: return "sy";
}
}
#define A64_SYSREG_ENC(op0,CRn,op2,CRm,op1) ((op0) << 14 | (op1) << 11 | \
(CRn) << 7 | (CRm) << 3 | (op2))
enum SystemRegister {
InvalidSystemReg = 0,
// Table in section 3.10.3
SPSR_EL1 = 0xc200,
SPSR_svc = SPSR_EL1,
ELR_EL1 = 0xc201,
SP_EL0 = 0xc208,
SPSel = 0xc210,
CurrentEL = 0xc212,
DAIF = 0xda11,
NZCV = 0xda10,
FPCR = 0xda20,
FPSR = 0xda21,
DSPSR = 0xda28,
DLR = 0xda29,
SPSR_EL2 = 0xe200,
SPSR_hyp = SPSR_EL2,
ELR_EL2 = 0xe201,
SP_EL1 = 0xe208,
SPSR_irq = 0xe218,
SPSR_abt = 0xe219,
SPSR_und = 0xe21a,
SPSR_fiq = 0xe21b,
SPSR_EL3 = 0xf200,
ELR_EL3 = 0xf201,
SP_EL2 = 0xf208,
// Table in section 3.10.8
MIDR_EL1 = 0xc000,
CTR_EL0 = 0xd801,
MPIDR_EL1 = 0xc005,
ECOIDR_EL1 = 0xc006,
DCZID_EL0 = 0xd807,
MVFR0_EL1 = 0xc018,
MVFR1_EL1 = 0xc019,
ID_AA64PFR0_EL1 = 0xc020,
ID_AA64PFR1_EL1 = 0xc021,
ID_AA64DFR0_EL1 = 0xc028,
ID_AA64DFR1_EL1 = 0xc029,
ID_AA64ISAR0_EL1 = 0xc030,
ID_AA64ISAR1_EL1 = 0xc031,
ID_AA64MMFR0_EL1 = 0xc038,
ID_AA64MMFR1_EL1 = 0xc039,
CCSIDR_EL1 = 0xc800,
CLIDR_EL1 = 0xc801,
AIDR_EL1 = 0xc807,
CSSELR_EL1 = 0xd000,
VPIDR_EL2 = 0xe000,
VMPIDR_EL2 = 0xe005,
SCTLR_EL1 = 0xc080,
SCTLR_EL2 = 0xe080,
SCTLR_EL3 = 0xf080,
ACTLR_EL1 = 0xc081,
ACTLR_EL2 = 0xe081,
ACTLR_EL3 = 0xf081,
CPACR_EL1 = 0xc082,
CPTR_EL2 = 0xe08a,
CPTR_EL3 = 0xf08a,
SCR_EL3 = 0xf088,
HCR_EL2 = 0xe088,
MDCR_EL2 = 0xe089,
MDCR_EL3 = 0xf099,
HSTR_EL2 = 0xe08b,
HACR_EL2 = 0xe08f,
TTBR0_EL1 = 0xc100,
TTBR1_EL1 = 0xc101,
TTBR0_EL2 = 0xe100,
TTBR0_EL3 = 0xf100,
VTTBR_EL2 = 0xe108,
TCR_EL1 = 0xc102,
TCR_EL2 = 0xe102,
TCR_EL3 = 0xf102,
VTCR_EL2 = 0xe10a,
ADFSR_EL1 = 0xc288,
AIFSR_EL1 = 0xc289,
ADFSR_EL2 = 0xe288,
AIFSR_EL2 = 0xe289,
ADFSR_EL3 = 0xf288,
AIFSR_EL3 = 0xf289,
ESR_EL1 = 0xc290,
ESR_EL2 = 0xe290,
ESR_EL3 = 0xf290,
FAR_EL1 = 0xc300,
FAR_EL2 = 0xe300,
FAR_EL3 = 0xf300,
HPFAR_EL2 = 0xe304,
PAR_EL1 = 0xc3a0,
MAIR_EL1 = 0xc510,
MAIR_EL2 = 0xe510,
MAIR_EL3 = 0xf510,
AMAIR_EL1 = 0xc518,
AMAIR_EL2 = 0xe518,
AMAIR_EL3 = 0xf518,
VBAR_EL1 = 0xc600,
VBAR_EL2 = 0xe600,
VBAR_EL3 = 0xf600,
RVBAR_EL1 = 0xc601,
RVBAR_EL2 = 0xe601,
RVBAR_EL3 = 0xf601,
ISR_EL1 = 0xc608,
CONTEXTIDR_EL1 = 0xc681,
TPIDR_EL0 = 0xde82,
TPIDRRO_EL0 = 0xde83,
TPIDR_EL1 = 0xc684,
TPIDR_EL2 = 0xe682,
TPIDR_EL3 = 0xf682,
TEECR32_EL1 = 0x9000,
CNTFRQ_EL0 = 0xdf00,
CNTPCT_EL0 = 0xdf01,
CNTVCT_EL0 = 0xdf02,
CNTVOFF_EL2 = 0xe703,
CNTKCTL_EL1 = 0xc708,
CNTHCTL_EL2 = 0xe708,
CNTP_TVAL_EL0 = 0xdf10,
CNTP_CTL_EL0 = 0xdf11,
CNTP_CVAL_EL0 = 0xdf12,
CNTV_TVAL_EL0 = 0xdf18,
CNTV_CTL_EL0 = 0xdf19,
CNTV_CVAL_EL0 = 0xdf1a,
CNTHP_TVAL_EL2 = 0xe710,
CNTHP_CTL_EL2 = 0xe711,
CNTHP_CVAL_EL2 = 0xe712,
CNTPS_TVAL_EL1 = 0xff10,
CNTPS_CTL_EL1 = 0xff11,
CNTPS_CVAL_EL1= 0xff12,
PMEVCNTR0_EL0 = 0xdf40,
PMEVCNTR1_EL0 = 0xdf41,
PMEVCNTR2_EL0 = 0xdf42,
PMEVCNTR3_EL0 = 0xdf43,
PMEVCNTR4_EL0 = 0xdf44,
PMEVCNTR5_EL0 = 0xdf45,
PMEVCNTR6_EL0 = 0xdf46,
PMEVCNTR7_EL0 = 0xdf47,
PMEVCNTR8_EL0 = 0xdf48,
PMEVCNTR9_EL0 = 0xdf49,
PMEVCNTR10_EL0 = 0xdf4a,
PMEVCNTR11_EL0 = 0xdf4b,
PMEVCNTR12_EL0 = 0xdf4c,
PMEVCNTR13_EL0 = 0xdf4d,
PMEVCNTR14_EL0 = 0xdf4e,
PMEVCNTR15_EL0 = 0xdf4f,
PMEVCNTR16_EL0 = 0xdf50,
PMEVCNTR17_EL0 = 0xdf51,
PMEVCNTR18_EL0 = 0xdf52,
PMEVCNTR19_EL0 = 0xdf53,
PMEVCNTR20_EL0 = 0xdf54,
PMEVCNTR21_EL0 = 0xdf55,
PMEVCNTR22_EL0 = 0xdf56,
PMEVCNTR23_EL0 = 0xdf57,
PMEVCNTR24_EL0 = 0xdf58,
PMEVCNTR25_EL0 = 0xdf59,
PMEVCNTR26_EL0 = 0xdf5a,
PMEVCNTR27_EL0 = 0xdf5b,
PMEVCNTR28_EL0 = 0xdf5c,
PMEVCNTR29_EL0 = 0xdf5d,
PMEVCNTR30_EL0 = 0xdf5e,
PMEVTYPER0_EL0 = 0xdf60,
PMEVTYPER1_EL0 = 0xdf61,
PMEVTYPER2_EL0 = 0xdf62,
PMEVTYPER3_EL0 = 0xdf63,
PMEVTYPER4_EL0 = 0xdf64,
PMEVTYPER5_EL0 = 0xdf65,
PMEVTYPER6_EL0 = 0xdf66,
PMEVTYPER7_EL0 = 0xdf67,
PMEVTYPER8_EL0 = 0xdf68,
PMEVTYPER9_EL0 = 0xdf69,
PMEVTYPER10_EL0 = 0xdf6a,
PMEVTYPER11_EL0 = 0xdf6b,
PMEVTYPER12_EL0 = 0xdf6c,
PMEVTYPER13_EL0 = 0xdf6d,
PMEVTYPER14_EL0 = 0xdf6e,
PMEVTYPER15_EL0 = 0xdf6f,
PMEVTYPER16_EL0 = 0xdf70,
PMEVTYPER17_EL0 = 0xdf71,
PMEVTYPER18_EL0 = 0xdf72,
PMEVTYPER19_EL0 = 0xdf73,
PMEVTYPER20_EL0 = 0xdf74,
PMEVTYPER21_EL0 = 0xdf75,
PMEVTYPER22_EL0 = 0xdf76,
PMEVTYPER23_EL0 = 0xdf77,
PMEVTYPER24_EL0 = 0xdf78,
PMEVTYPER25_EL0 = 0xdf79,
PMEVTYPER26_EL0 = 0xdf7a,
PMEVTYPER27_EL0 = 0xdf7b,
PMEVTYPER28_EL0 = 0xdf7c,
PMEVTYPER29_EL0 = 0xdf7d,
PMEVTYPER30_EL0 = 0xdf7e,
PMCCFILTR_EL0 = 0xdf7f,
RMR_EL3 = 0xf602,
RMR_EL2 = 0xd602,
RMR_EL1 = 0xce02,
// Debug Architecture 5.3, Table 17.
MDCCSR_EL0 = A64_SYSREG_ENC(2, 0, 0, 1, 3),
MDCCINT_EL1 = A64_SYSREG_ENC(2, 0, 0, 2, 0),
DBGDTR_EL0 = A64_SYSREG_ENC(2, 0, 0, 4, 3),
DBGDTRRX_EL0 = A64_SYSREG_ENC(2, 0, 0, 5, 3),
DBGDTRTX_EL0 = DBGDTRRX_EL0,
DBGVCR32_EL2 = A64_SYSREG_ENC(2, 0, 0, 7, 4),
OSDTRRX_EL1 = A64_SYSREG_ENC(2, 0, 2, 0, 0),
MDSCR_EL1 = A64_SYSREG_ENC(2, 0, 2, 2, 0),
OSDTRTX_EL1 = A64_SYSREG_ENC(2, 0, 2, 3, 0),
OSECCR_EL11 = A64_SYSREG_ENC(2, 0, 2, 6, 0),
DBGBVR0_EL1 = A64_SYSREG_ENC(2, 0, 4, 0, 0),
DBGBVR1_EL1 = A64_SYSREG_ENC(2, 0, 4, 1, 0),
DBGBVR2_EL1 = A64_SYSREG_ENC(2, 0, 4, 2, 0),
DBGBVR3_EL1 = A64_SYSREG_ENC(2, 0, 4, 3, 0),
DBGBVR4_EL1 = A64_SYSREG_ENC(2, 0, 4, 4, 0),
DBGBVR5_EL1 = A64_SYSREG_ENC(2, 0, 4, 5, 0),
DBGBVR6_EL1 = A64_SYSREG_ENC(2, 0, 4, 6, 0),
DBGBVR7_EL1 = A64_SYSREG_ENC(2, 0, 4, 7, 0),
DBGBVR8_EL1 = A64_SYSREG_ENC(2, 0, 4, 8, 0),
DBGBVR9_EL1 = A64_SYSREG_ENC(2, 0, 4, 9, 0),
DBGBVR10_EL1 = A64_SYSREG_ENC(2, 0, 4, 10, 0),
DBGBVR11_EL1 = A64_SYSREG_ENC(2, 0, 4, 11, 0),
DBGBVR12_EL1 = A64_SYSREG_ENC(2, 0, 4, 12, 0),
DBGBVR13_EL1 = A64_SYSREG_ENC(2, 0, 4, 13, 0),
DBGBVR14_EL1 = A64_SYSREG_ENC(2, 0, 4, 14, 0),
DBGBVR15_EL1 = A64_SYSREG_ENC(2, 0, 4, 15, 0),
DBGBCR0_EL1 = A64_SYSREG_ENC(2, 0, 5, 0, 0),
DBGBCR1_EL1 = A64_SYSREG_ENC(2, 0, 5, 1, 0),
DBGBCR2_EL1 = A64_SYSREG_ENC(2, 0, 5, 2, 0),
DBGBCR3_EL1 = A64_SYSREG_ENC(2, 0, 5, 3, 0),
DBGBCR4_EL1 = A64_SYSREG_ENC(2, 0, 5, 4, 0),
DBGBCR5_EL1 = A64_SYSREG_ENC(2, 0, 5, 5, 0),
DBGBCR6_EL1 = A64_SYSREG_ENC(2, 0, 5, 6, 0),
DBGBCR7_EL1 = A64_SYSREG_ENC(2, 0, 5, 7, 0),
DBGBCR8_EL1 = A64_SYSREG_ENC(2, 0, 5, 8, 0),
DBGBCR9_EL1 = A64_SYSREG_ENC(2, 0, 5, 9, 0),
DBGBCR10_EL1 = A64_SYSREG_ENC(2, 0, 5, 10, 0),
DBGBCR11_EL1 = A64_SYSREG_ENC(2, 0, 5, 11, 0),
DBGBCR12_EL1 = A64_SYSREG_ENC(2, 0, 5, 12, 0),
DBGBCR13_EL1 = A64_SYSREG_ENC(2, 0, 5, 13, 0),
DBGBCR14_EL1 = A64_SYSREG_ENC(2, 0, 5, 14, 0),
DBGBCR15_EL1 = A64_SYSREG_ENC(2, 0, 5, 15, 0),
DBGWVR0_EL1 = A64_SYSREG_ENC(2, 0, 6, 0, 0),
DBGWVR1_EL1 = A64_SYSREG_ENC(2, 0, 6, 1, 0),
DBGWVR2_EL1 = A64_SYSREG_ENC(2, 0, 6, 2, 0),
DBGWVR3_EL1 = A64_SYSREG_ENC(2, 0, 6, 3, 0),
DBGWVR4_EL1 = A64_SYSREG_ENC(2, 0, 6, 4, 0),
DBGWVR5_EL1 = A64_SYSREG_ENC(2, 0, 6, 5, 0),
DBGWVR6_EL1 = A64_SYSREG_ENC(2, 0, 6, 6, 0),
DBGWVR7_EL1 = A64_SYSREG_ENC(2, 0, 6, 7, 0),
DBGWVR8_EL1 = A64_SYSREG_ENC(2, 0, 6, 8, 0),
DBGWVR9_EL1 = A64_SYSREG_ENC(2, 0, 6, 9, 0),
DBGWVR10_EL1 = A64_SYSREG_ENC(2, 0, 6, 10, 0),
DBGWVR11_EL1 = A64_SYSREG_ENC(2, 0, 6, 11, 0),
DBGWVR12_EL1 = A64_SYSREG_ENC(2, 0, 6, 12, 0),
DBGWVR13_EL1 = A64_SYSREG_ENC(2, 0, 6, 13, 0),
DBGWVR14_EL1 = A64_SYSREG_ENC(2, 0, 6, 14, 0),
DBGWVR15_EL1 = A64_SYSREG_ENC(2, 0, 6, 15, 0),
DBGWCR0_EL1 = A64_SYSREG_ENC(2, 0, 7, 0, 0),
DBGWCR1_EL1 = A64_SYSREG_ENC(2, 0, 7, 1, 0),
DBGWCR2_EL1 = A64_SYSREG_ENC(2, 0, 7, 2, 0),
DBGWCR3_EL1 = A64_SYSREG_ENC(2, 0, 7, 3, 0),
DBGWCR4_EL1 = A64_SYSREG_ENC(2, 0, 7, 4, 0),
DBGWCR5_EL1 = A64_SYSREG_ENC(2, 0, 7, 5, 0),
DBGWCR6_EL1 = A64_SYSREG_ENC(2, 0, 7, 6, 0),
DBGWCR7_EL1 = A64_SYSREG_ENC(2, 0, 7, 7, 0),
DBGWCR8_EL1 = A64_SYSREG_ENC(2, 0, 7, 8, 0),
DBGWCR9_EL1 = A64_SYSREG_ENC(2, 0, 7, 9, 0),
DBGWCR10_EL1 = A64_SYSREG_ENC(2, 0, 7, 10, 0),
DBGWCR11_EL1 = A64_SYSREG_ENC(2, 0, 7, 11, 0),
DBGWCR12_EL1 = A64_SYSREG_ENC(2, 0, 7, 12, 0),
DBGWCR13_EL1 = A64_SYSREG_ENC(2, 0, 7, 13, 0),
DBGWCR14_EL1 = A64_SYSREG_ENC(2, 0, 7, 14, 0),
DBGWCR15_EL1 = A64_SYSREG_ENC(2, 0, 7, 15, 0),
MDRAR_EL1 = A64_SYSREG_ENC(2, 1, 0, 0, 0),
OSLAR_EL1 = A64_SYSREG_ENC(2, 1, 4, 0, 0),
OSLSR_EL1 = A64_SYSREG_ENC(2, 1, 4, 1, 0),
OSDLR_EL1 = A64_SYSREG_ENC(2, 1, 4, 3, 0),
DBGPRCR_EL1 = A64_SYSREG_ENC(2, 1, 4, 4, 0),
DBGCLAIMSET_EL1 = A64_SYSREG_ENC(2, 7, 6, 8, 0),
DBGCLAIMCLR_EL1 = A64_SYSREG_ENC(2, 7, 6, 9, 0),
DBGAUTHSTATUS_EL1 = A64_SYSREG_ENC(2, 7, 6, 14, 0),
DBGDEVID2 = A64_SYSREG_ENC(2, 7, 7, 0, 0),
DBGDEVID1 = A64_SYSREG_ENC(2, 7, 7, 1, 0),
DBGDEVID0 = A64_SYSREG_ENC(2, 7, 7, 2, 0),
// The following registers are defined to allow access from AArch64 to
// registers which are only used in the AArch32 architecture.
DACR32_EL2 = 0xe180,
IFSR32_EL2 = 0xe281,
TEEHBR32_EL1 = 0x9080,
SDER32_EL3 = 0xf089,
FPEXC32_EL2 = 0xe298,
// Cyclone specific system registers
CPM_IOACC_CTL_EL3 = 0xff90,
// Architectural system registers
ID_PFR0_EL1 = 0xc008,
ID_PFR1_EL1 = 0xc009,
ID_DFR0_EL1 = 0xc00a,
ID_AFR0_EL1 = 0xc00b,
ID_ISAR0_EL1 = 0xc010,
ID_ISAR1_EL1 = 0xc011,
ID_ISAR2_EL1 = 0xc012,
ID_ISAR3_EL1 = 0xc013,
ID_ISAR4_EL1 = 0xc014,
ID_ISAR5_EL1 = 0xc015,
AFSR1_EL1 = 0xc289, // note same as old AIFSR_EL1
AFSR0_EL1 = 0xc288, // note same as old ADFSR_EL1
REVIDR_EL1 = 0xc006 // note same as old ECOIDR_EL1
};
#undef A64_SYSREG_ENC
static inline const char *getSystemRegisterName(SystemRegister Reg) {
switch(Reg) {
default: return NULL; // Caller is responsible for handling invalid value.
case SPSR_EL1: return "SPSR_EL1";
case ELR_EL1: return "ELR_EL1";
case SP_EL0: return "SP_EL0";
case SPSel: return "SPSel";
case DAIF: return "DAIF";
case CurrentEL: return "CurrentEL";
case NZCV: return "NZCV";
case FPCR: return "FPCR";
case FPSR: return "FPSR";
case DSPSR: return "DSPSR";
case DLR: return "DLR";
case SPSR_EL2: return "SPSR_EL2";
case ELR_EL2: return "ELR_EL2";
case SP_EL1: return "SP_EL1";
case SPSR_irq: return "SPSR_irq";
case SPSR_abt: return "SPSR_abt";
case SPSR_und: return "SPSR_und";
case SPSR_fiq: return "SPSR_fiq";
case SPSR_EL3: return "SPSR_EL3";
case ELR_EL3: return "ELR_EL3";
case SP_EL2: return "SP_EL2";
case MIDR_EL1: return "MIDR_EL1";
case CTR_EL0: return "CTR_EL0";
case MPIDR_EL1: return "MPIDR_EL1";
case DCZID_EL0: return "DCZID_EL0";
case MVFR0_EL1: return "MVFR0_EL1";
case MVFR1_EL1: return "MVFR1_EL1";
case ID_AA64PFR0_EL1: return "ID_AA64PFR0_EL1";
case ID_AA64PFR1_EL1: return "ID_AA64PFR1_EL1";
case ID_AA64DFR0_EL1: return "ID_AA64DFR0_EL1";
case ID_AA64DFR1_EL1: return "ID_AA64DFR1_EL1";
case ID_AA64ISAR0_EL1: return "ID_AA64ISAR0_EL1";
case ID_AA64ISAR1_EL1: return "ID_AA64ISAR1_EL1";
case ID_AA64MMFR0_EL1: return "ID_AA64MMFR0_EL1";
case ID_AA64MMFR1_EL1: return "ID_AA64MMFR1_EL1";
case CCSIDR_EL1: return "CCSIDR_EL1";
case CLIDR_EL1: return "CLIDR_EL1";
case AIDR_EL1: return "AIDR_EL1";
case CSSELR_EL1: return "CSSELR_EL1";
case VPIDR_EL2: return "VPIDR_EL2";
case VMPIDR_EL2: return "VMPIDR_EL2";
case SCTLR_EL1: return "SCTLR_EL1";
case SCTLR_EL2: return "SCTLR_EL2";
case SCTLR_EL3: return "SCTLR_EL3";
case ACTLR_EL1: return "ACTLR_EL1";
case ACTLR_EL2: return "ACTLR_EL2";
case ACTLR_EL3: return "ACTLR_EL3";
case CPACR_EL1: return "CPACR_EL1";
case CPTR_EL2: return "CPTR_EL2";
case CPTR_EL3: return "CPTR_EL3";
case SCR_EL3: return "SCR_EL3";
case HCR_EL2: return "HCR_EL2";
case MDCR_EL2: return "MDCR_EL2";
case MDCR_EL3: return "MDCR_EL3";
case HSTR_EL2: return "HSTR_EL2";
case HACR_EL2: return "HACR_EL2";
case TTBR0_EL1: return "TTBR0_EL1";
case TTBR1_EL1: return "TTBR1_EL1";
case TTBR0_EL2: return "TTBR0_EL2";
case TTBR0_EL3: return "TTBR0_EL3";
case VTTBR_EL2: return "VTTBR_EL2";
case TCR_EL1: return "TCR_EL1";
case TCR_EL2: return "TCR_EL2";
case TCR_EL3: return "TCR_EL3";
case VTCR_EL2: return "VTCR_EL2";
case ADFSR_EL2: return "ADFSR_EL2";
case AIFSR_EL2: return "AIFSR_EL2";
case ADFSR_EL3: return "ADFSR_EL3";
case AIFSR_EL3: return "AIFSR_EL3";
case ESR_EL1: return "ESR_EL1";
case ESR_EL2: return "ESR_EL2";
case ESR_EL3: return "ESR_EL3";
case FAR_EL1: return "FAR_EL1";
case FAR_EL2: return "FAR_EL2";
case FAR_EL3: return "FAR_EL3";
case HPFAR_EL2: return "HPFAR_EL2";
case PAR_EL1: return "PAR_EL1";
case MAIR_EL1: return "MAIR_EL1";
case MAIR_EL2: return "MAIR_EL2";
case MAIR_EL3: return "MAIR_EL3";
case AMAIR_EL1: return "AMAIR_EL1";
case AMAIR_EL2: return "AMAIR_EL2";
case AMAIR_EL3: return "AMAIR_EL3";
case VBAR_EL1: return "VBAR_EL1";
case VBAR_EL2: return "VBAR_EL2";
case VBAR_EL3: return "VBAR_EL3";
case RVBAR_EL1: return "RVBAR_EL1";
case RVBAR_EL2: return "RVBAR_EL2";
case RVBAR_EL3: return "RVBAR_EL3";
case ISR_EL1: return "ISR_EL1";
case CONTEXTIDR_EL1: return "CONTEXTIDR_EL1";
case TPIDR_EL0: return "TPIDR_EL0";
case TPIDRRO_EL0: return "TPIDRRO_EL0";
case TPIDR_EL1: return "TPIDR_EL1";
case TPIDR_EL2: return "TPIDR_EL2";
case TPIDR_EL3: return "TPIDR_EL3";
case TEECR32_EL1: return "TEECR32_EL1";
case CNTFRQ_EL0: return "CNTFRQ_EL0";
case CNTPCT_EL0: return "CNTPCT_EL0";
case CNTVCT_EL0: return "CNTVCT_EL0";
case CNTVOFF_EL2: return "CNTVOFF_EL2";
case CNTKCTL_EL1: return "CNTKCTL_EL1";
case CNTHCTL_EL2: return "CNTHCTL_EL2";
case CNTP_TVAL_EL0: return "CNTP_TVAL_EL0";
case CNTP_CTL_EL0: return "CNTP_CTL_EL0";
case CNTP_CVAL_EL0: return "CNTP_CVAL_EL0";
case CNTV_TVAL_EL0: return "CNTV_TVAL_EL0";
case CNTV_CTL_EL0: return "CNTV_CTL_EL0";
case CNTV_CVAL_EL0: return "CNTV_CVAL_EL0";
case CNTHP_TVAL_EL2: return "CNTHP_TVAL_EL2";
case CNTHP_CTL_EL2: return "CNTHP_CTL_EL2";
case CNTHP_CVAL_EL2: return "CNTHP_CVAL_EL2";
case CNTPS_TVAL_EL1: return "CNTPS_TVAL_EL1";
case CNTPS_CTL_EL1: return "CNTPS_CTL_EL1";
case CNTPS_CVAL_EL1: return "CNTPS_CVAL_EL1";
case DACR32_EL2: return "DACR32_EL2";
case IFSR32_EL2: return "IFSR32_EL2";
case TEEHBR32_EL1: return "TEEHBR32_EL1";
case SDER32_EL3: return "SDER32_EL3";
case FPEXC32_EL2: return "FPEXC32_EL2";
case PMEVCNTR0_EL0: return "PMEVCNTR0_EL0";
case PMEVCNTR1_EL0: return "PMEVCNTR1_EL0";
case PMEVCNTR2_EL0: return "PMEVCNTR2_EL0";
case PMEVCNTR3_EL0: return "PMEVCNTR3_EL0";
case PMEVCNTR4_EL0: return "PMEVCNTR4_EL0";
case PMEVCNTR5_EL0: return "PMEVCNTR5_EL0";
case PMEVCNTR6_EL0: return "PMEVCNTR6_EL0";
case PMEVCNTR7_EL0: return "PMEVCNTR7_EL0";
case PMEVCNTR8_EL0: return "PMEVCNTR8_EL0";
case PMEVCNTR9_EL0: return "PMEVCNTR9_EL0";
case PMEVCNTR10_EL0: return "PMEVCNTR10_EL0";
case PMEVCNTR11_EL0: return "PMEVCNTR11_EL0";
case PMEVCNTR12_EL0: return "PMEVCNTR12_EL0";
case PMEVCNTR13_EL0: return "PMEVCNTR13_EL0";
case PMEVCNTR14_EL0: return "PMEVCNTR14_EL0";
case PMEVCNTR15_EL0: return "PMEVCNTR15_EL0";
case PMEVCNTR16_EL0: return "PMEVCNTR16_EL0";
case PMEVCNTR17_EL0: return "PMEVCNTR17_EL0";
case PMEVCNTR18_EL0: return "PMEVCNTR18_EL0";
case PMEVCNTR19_EL0: return "PMEVCNTR19_EL0";
case PMEVCNTR20_EL0: return "PMEVCNTR20_EL0";
case PMEVCNTR21_EL0: return "PMEVCNTR21_EL0";
case PMEVCNTR22_EL0: return "PMEVCNTR22_EL0";
case PMEVCNTR23_EL0: return "PMEVCNTR23_EL0";
case PMEVCNTR24_EL0: return "PMEVCNTR24_EL0";
case PMEVCNTR25_EL0: return "PMEVCNTR25_EL0";
case PMEVCNTR26_EL0: return "PMEVCNTR26_EL0";
case PMEVCNTR27_EL0: return "PMEVCNTR27_EL0";
case PMEVCNTR28_EL0: return "PMEVCNTR28_EL0";
case PMEVCNTR29_EL0: return "PMEVCNTR29_EL0";
case PMEVCNTR30_EL0: return "PMEVCNTR30_EL0";
case PMEVTYPER0_EL0: return "PMEVTYPER0_EL0";
case PMEVTYPER1_EL0: return "PMEVTYPER1_EL0";
case PMEVTYPER2_EL0: return "PMEVTYPER2_EL0";
case PMEVTYPER3_EL0: return "PMEVTYPER3_EL0";
case PMEVTYPER4_EL0: return "PMEVTYPER4_EL0";
case PMEVTYPER5_EL0: return "PMEVTYPER5_EL0";
case PMEVTYPER6_EL0: return "PMEVTYPER6_EL0";
case PMEVTYPER7_EL0: return "PMEVTYPER7_EL0";
case PMEVTYPER8_EL0: return "PMEVTYPER8_EL0";
case PMEVTYPER9_EL0: return "PMEVTYPER9_EL0";
case PMEVTYPER10_EL0: return "PMEVTYPER10_EL0";
case PMEVTYPER11_EL0: return "PMEVTYPER11_EL0";
case PMEVTYPER12_EL0: return "PMEVTYPER12_EL0";
case PMEVTYPER13_EL0: return "PMEVTYPER13_EL0";
case PMEVTYPER14_EL0: return "PMEVTYPER14_EL0";
case PMEVTYPER15_EL0: return "PMEVTYPER15_EL0";
case PMEVTYPER16_EL0: return "PMEVTYPER16_EL0";
case PMEVTYPER17_EL0: return "PMEVTYPER17_EL0";
case PMEVTYPER18_EL0: return "PMEVTYPER18_EL0";
case PMEVTYPER19_EL0: return "PMEVTYPER19_EL0";
case PMEVTYPER20_EL0: return "PMEVTYPER20_EL0";
case PMEVTYPER21_EL0: return "PMEVTYPER21_EL0";
case PMEVTYPER22_EL0: return "PMEVTYPER22_EL0";
case PMEVTYPER23_EL0: return "PMEVTYPER23_EL0";
case PMEVTYPER24_EL0: return "PMEVTYPER24_EL0";
case PMEVTYPER25_EL0: return "PMEVTYPER25_EL0";
case PMEVTYPER26_EL0: return "PMEVTYPER26_EL0";
case PMEVTYPER27_EL0: return "PMEVTYPER27_EL0";
case PMEVTYPER28_EL0: return "PMEVTYPER28_EL0";
case PMEVTYPER29_EL0: return "PMEVTYPER29_EL0";
case PMEVTYPER30_EL0: return "PMEVTYPER30_EL0";
case PMCCFILTR_EL0: return "PMCCFILTR_EL0";
case RMR_EL3: return "RMR_EL3";
case RMR_EL2: return "RMR_EL2";
case RMR_EL1: return "RMR_EL1";
case CPM_IOACC_CTL_EL3: return "CPM_IOACC_CTL_EL3";
case MDCCSR_EL0: return "MDCCSR_EL0";
case MDCCINT_EL1: return "MDCCINT_EL1";
case DBGDTR_EL0: return "DBGDTR_EL0";
case DBGDTRRX_EL0: return "DBGDTRRX_EL0";
case DBGVCR32_EL2: return "DBGVCR32_EL2";
case OSDTRRX_EL1: return "OSDTRRX_EL1";
case MDSCR_EL1: return "MDSCR_EL1";
case OSDTRTX_EL1: return "OSDTRTX_EL1";
case OSECCR_EL11: return "OSECCR_EL11";
case DBGBVR0_EL1: return "DBGBVR0_EL1";
case DBGBVR1_EL1: return "DBGBVR1_EL1";
case DBGBVR2_EL1: return "DBGBVR2_EL1";
case DBGBVR3_EL1: return "DBGBVR3_EL1";
case DBGBVR4_EL1: return "DBGBVR4_EL1";
case DBGBVR5_EL1: return "DBGBVR5_EL1";
case DBGBVR6_EL1: return "DBGBVR6_EL1";
case DBGBVR7_EL1: return "DBGBVR7_EL1";
case DBGBVR8_EL1: return "DBGBVR8_EL1";
case DBGBVR9_EL1: return "DBGBVR9_EL1";
case DBGBVR10_EL1: return "DBGBVR10_EL1";
case DBGBVR11_EL1: return "DBGBVR11_EL1";
case DBGBVR12_EL1: return "DBGBVR12_EL1";
case DBGBVR13_EL1: return "DBGBVR13_EL1";
case DBGBVR14_EL1: return "DBGBVR14_EL1";
case DBGBVR15_EL1: return "DBGBVR15_EL1";
case DBGBCR0_EL1: return "DBGBCR0_EL1";
case DBGBCR1_EL1: return "DBGBCR1_EL1";
case DBGBCR2_EL1: return "DBGBCR2_EL1";
case DBGBCR3_EL1: return "DBGBCR3_EL1";
case DBGBCR4_EL1: return "DBGBCR4_EL1";
case DBGBCR5_EL1: return "DBGBCR5_EL1";
case DBGBCR6_EL1: return "DBGBCR6_EL1";
case DBGBCR7_EL1: return "DBGBCR7_EL1";
case DBGBCR8_EL1: return "DBGBCR8_EL1";
case DBGBCR9_EL1: return "DBGBCR9_EL1";
case DBGBCR10_EL1: return "DBGBCR10_EL1";
case DBGBCR11_EL1: return "DBGBCR11_EL1";
case DBGBCR12_EL1: return "DBGBCR12_EL1";
case DBGBCR13_EL1: return "DBGBCR13_EL1";
case DBGBCR14_EL1: return "DBGBCR14_EL1";
case DBGBCR15_EL1: return "DBGBCR15_EL1";
case DBGWVR0_EL1: return "DBGWVR0_EL1";
case DBGWVR1_EL1: return "DBGWVR1_EL1";
case DBGWVR2_EL1: return "DBGWVR2_EL1";
case DBGWVR3_EL1: return "DBGWVR3_EL1";
case DBGWVR4_EL1: return "DBGWVR4_EL1";
case DBGWVR5_EL1: return "DBGWVR5_EL1";
case DBGWVR6_EL1: return "DBGWVR6_EL1";
case DBGWVR7_EL1: return "DBGWVR7_EL1";
case DBGWVR8_EL1: return "DBGWVR8_EL1";
case DBGWVR9_EL1: return "DBGWVR9_EL1";
case DBGWVR10_EL1: return "DBGWVR10_EL1";
case DBGWVR11_EL1: return "DBGWVR11_EL1";
case DBGWVR12_EL1: return "DBGWVR12_EL1";
case DBGWVR13_EL1: return "DBGWVR13_EL1";
case DBGWVR14_EL1: return "DBGWVR14_EL1";
case DBGWVR15_EL1: return "DBGWVR15_EL1";
case DBGWCR0_EL1: return "DBGWCR0_EL1";
case DBGWCR1_EL1: return "DBGWCR1_EL1";
case DBGWCR2_EL1: return "DBGWCR2_EL1";
case DBGWCR3_EL1: return "DBGWCR3_EL1";
case DBGWCR4_EL1: return "DBGWCR4_EL1";
case DBGWCR5_EL1: return "DBGWCR5_EL1";
case DBGWCR6_EL1: return "DBGWCR6_EL1";
case DBGWCR7_EL1: return "DBGWCR7_EL1";
case DBGWCR8_EL1: return "DBGWCR8_EL1";
case DBGWCR9_EL1: return "DBGWCR9_EL1";
case DBGWCR10_EL1: return "DBGWCR10_EL1";
case DBGWCR11_EL1: return "DBGWCR11_EL1";
case DBGWCR12_EL1: return "DBGWCR12_EL1";
case DBGWCR13_EL1: return "DBGWCR13_EL1";
case DBGWCR14_EL1: return "DBGWCR14_EL1";
case DBGWCR15_EL1: return "DBGWCR15_EL1";
case MDRAR_EL1: return "MDRAR_EL1";
case OSLAR_EL1: return "OSLAR_EL1";
case OSLSR_EL1: return "OSLSR_EL1";
case OSDLR_EL1: return "OSDLR_EL1";
case DBGPRCR_EL1: return "DBGPRCR_EL1";
case DBGCLAIMSET_EL1: return "DBGCLAIMSET_EL1";
case DBGCLAIMCLR_EL1: return "DBGCLAIMCLR_EL1";
case DBGAUTHSTATUS_EL1: return "DBGAUTHSTATUS_EL1";
case DBGDEVID2: return "DBGDEVID2";
case DBGDEVID1: return "DBGDEVID1";
case DBGDEVID0: return "DBGDEVID0";
case ID_PFR0_EL1: return "ID_PFR0_EL1";
case ID_PFR1_EL1: return "ID_PFR1_EL1";
case ID_DFR0_EL1: return "ID_DFR0_EL1";
case ID_AFR0_EL1: return "ID_AFR0_EL1";
case ID_ISAR0_EL1: return "ID_ISAR0_EL1";
case ID_ISAR1_EL1: return "ID_ISAR1_EL1";
case ID_ISAR2_EL1: return "ID_ISAR2_EL1";
case ID_ISAR3_EL1: return "ID_ISAR3_EL1";
case ID_ISAR4_EL1: return "ID_ISAR4_EL1";
case ID_ISAR5_EL1: return "ID_ISAR5_EL1";
case AFSR1_EL1: return "AFSR1_EL1";
case AFSR0_EL1: return "AFSR0_EL1";
case REVIDR_EL1: return "REVIDR_EL1";
}
}
enum CPSRField {
InvalidCPSRField = 0xff,
cpsr_SPSel = 0x5,
cpsr_DAIFSet = 0x1e,
cpsr_DAIFClr = 0x1f
};
static inline const char *getCPSRFieldName(CPSRField Val) {
switch(Val) {
default: assert(0 && "Invalid system register value!");
case cpsr_SPSel: return "SPSel";
case cpsr_DAIFSet: return "DAIFSet";
case cpsr_DAIFClr: return "DAIFClr";
}
}
} // end namespace ARM64SYS
namespace ARM64II {
/// Target Operand Flag enum.
enum TOF {
//===------------------------------------------------------------------===//
// ARM64 Specific MachineOperand flags.
MO_NO_FLAG,
MO_FRAGMENT = 0x7,
/// MO_PAGE - A symbol operand with this flag represents the pc-relative
/// offset of the 4K page containing the symbol. This is used with the
/// ADRP instruction.
MO_PAGE = 1,
/// MO_PAGEOFF - A symbol operand with this flag represents the offset of
/// that symbol within a 4K page. This offset is added to the page address
/// to produce the complete address.
MO_PAGEOFF = 2,
/// MO_G3 - A symbol operand with this flag (granule 3) represents the high
/// 16-bits of a 64-bit address, used in a MOVZ or MOVK instruction
MO_G3 = 3,
/// MO_G2 - A symbol operand with this flag (granule 2) represents the bits
/// 32-47 of a 64-bit address, used in a MOVZ or MOVK instruction
MO_G2 = 4,
/// MO_G1 - A symbol operand with this flag (granule 1) represents the bits
/// 16-31 of a 64-bit address, used in a MOVZ or MOVK instruction
MO_G1 = 5,
/// MO_G0 - A symbol operand with this flag (granule 0) represents the bits
/// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction
MO_G0 = 6,
/// MO_GOT - This flag indicates that a symbol operand represents the
/// address of the GOT entry for the symbol, rather than the address of
/// the symbol itself.
MO_GOT = 8,
/// MO_NC - Indicates whether the linker is expected to check the symbol
/// reference for overflow. For example in an ADRP/ADD pair of relocations
/// the ADRP usually does check, but not the ADD.
MO_NC = 0x10,
/// MO_TLS - Indicates that the operand being accessed is some kind of
/// thread-local symbol. On Darwin, only one type of thread-local access
/// exists (pre linker-relaxation), but on ELF the TLSModel used for the
/// referee will affect interpretation.
MO_TLS = 0x20
};
} // end namespace ARM64II
} // end namespace llvm
#endif

View File

@ -0,0 +1,237 @@
//===-- ARM64ELFObjectWriter.cpp - ARM64 ELF Writer -----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file handles ELF-specific object emission, converting LLVM's internal
// fixups into the appropriate relocations.
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/ARM64FixupKinds.h"
#include "MCTargetDesc/ARM64MCExpr.h"
#include "MCTargetDesc/ARM64MCTargetDesc.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
namespace {
class ARM64ELFObjectWriter : public MCELFObjectTargetWriter {
public:
ARM64ELFObjectWriter(uint8_t OSABI);
virtual ~ARM64ELFObjectWriter();
protected:
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override;
private:
};
}
ARM64ELFObjectWriter::ARM64ELFObjectWriter(uint8_t OSABI)
: MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64,
/*HasRelocationAddend*/ true) {}
ARM64ELFObjectWriter::~ARM64ELFObjectWriter() {}
unsigned ARM64ELFObjectWriter::GetRelocType(const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
ARM64MCExpr::VariantKind RefKind =
static_cast<ARM64MCExpr::VariantKind>(Target.getRefKind());
ARM64MCExpr::VariantKind SymLoc = ARM64MCExpr::getSymbolLoc(RefKind);
bool IsNC = ARM64MCExpr::isNotChecked(RefKind);
assert((!Target.getSymA() ||
Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None) &&
"Should only be expression-level modifiers here");
assert((!Target.getSymB() ||
Target.getSymB()->getKind() == MCSymbolRefExpr::VK_None) &&
"Should only be expression-level modifiers here");
if (IsPCRel) {
switch ((unsigned)Fixup.getKind()) {
case FK_Data_2:
return ELF::R_AARCH64_PREL16;
case FK_Data_4:
return ELF::R_AARCH64_PREL32;
case FK_Data_8:
return ELF::R_AARCH64_PREL64;
case ARM64::fixup_arm64_pcrel_adr_imm21:
llvm_unreachable("No ELF relocations supported for ADR at the moment");
case ARM64::fixup_arm64_pcrel_adrp_imm21:
if (SymLoc == ARM64MCExpr::VK_ABS && !IsNC)
return ELF::R_AARCH64_ADR_PREL_PG_HI21;
if (SymLoc == ARM64MCExpr::VK_GOT && !IsNC)
return ELF::R_AARCH64_ADR_GOT_PAGE;
if (SymLoc == ARM64MCExpr::VK_GOTTPREL && !IsNC)
return ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21;
if (SymLoc == ARM64MCExpr::VK_TLSDESC && !IsNC)
return ELF::R_AARCH64_TLSDESC_ADR_PAGE;
llvm_unreachable("invalid symbol kind for ADRP relocation");
case ARM64::fixup_arm64_pcrel_branch26:
return ELF::R_AARCH64_JUMP26;
case ARM64::fixup_arm64_pcrel_call26:
return ELF::R_AARCH64_CALL26;
case ARM64::fixup_arm64_pcrel_imm19:
return ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19;
default:
llvm_unreachable("Unsupported pc-relative fixup kind");
}
} else {
switch ((unsigned)Fixup.getKind()) {
case FK_Data_2:
return ELF::R_AARCH64_ABS16;
case FK_Data_4:
return ELF::R_AARCH64_ABS32;
case FK_Data_8:
return ELF::R_AARCH64_ABS64;
case ARM64::fixup_arm64_add_imm12:
if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC;
if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12;
if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC;
if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12;
if (SymLoc == ARM64MCExpr::VK_TLSDESC && IsNC)
return ELF::R_AARCH64_TLSDESC_ADD_LO12_NC;
if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
return ELF::R_AARCH64_ADD_ABS_LO12_NC;
report_fatal_error("invalid fixup for add (uimm12) instruction");
return 0;
case ARM64::fixup_arm64_ldst_imm12_scale1:
if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
return ELF::R_AARCH64_LDST8_ABS_LO12_NC;
if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12;
if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC;
if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12;
if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC;
report_fatal_error("invalid fixup for 8-bit load/store instruction");
return 0;
case ARM64::fixup_arm64_ldst_imm12_scale2:
if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
return ELF::R_AARCH64_LDST16_ABS_LO12_NC;
if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12;
if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC;
if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12;
if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC;
report_fatal_error("invalid fixup for 16-bit load/store instruction");
return 0;
case ARM64::fixup_arm64_ldst_imm12_scale4:
if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
return ELF::R_AARCH64_LDST32_ABS_LO12_NC;
if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12;
if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC;
if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12;
if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC;
report_fatal_error("invalid fixup for 32-bit load/store instruction");
return 0;
case ARM64::fixup_arm64_ldst_imm12_scale8:
if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
return ELF::R_AARCH64_LDST64_ABS_LO12_NC;
if (SymLoc == ARM64MCExpr::VK_GOT && IsNC)
return ELF::R_AARCH64_LD64_GOT_LO12_NC;
if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12;
if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC;
if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12;
if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC;
if (SymLoc == ARM64MCExpr::VK_GOTTPREL && IsNC)
return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
if (SymLoc == ARM64MCExpr::VK_TLSDESC && IsNC)
return ELF::R_AARCH64_TLSDESC_LD64_LO12_NC;
report_fatal_error("invalid fixup for 64-bit load/store instruction");
return 0;
case ARM64::fixup_arm64_ldst_imm12_scale16:
if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
return ELF::R_AARCH64_LDST128_ABS_LO12_NC;
report_fatal_error("invalid fixup for 128-bit load/store instruction");
return 0;
case ARM64::fixup_arm64_movw:
if (RefKind == ARM64MCExpr::VK_ABS_G3)
return ELF::R_AARCH64_MOVW_UABS_G3;
if (RefKind == ARM64MCExpr::VK_ABS_G2)
return ELF::R_AARCH64_MOVW_UABS_G2;
if (RefKind == ARM64MCExpr::VK_ABS_G2_NC)
return ELF::R_AARCH64_MOVW_UABS_G2_NC;
if (RefKind == ARM64MCExpr::VK_ABS_G1)
return ELF::R_AARCH64_MOVW_UABS_G1;
if (RefKind == ARM64MCExpr::VK_ABS_G1_NC)
return ELF::R_AARCH64_MOVW_UABS_G1_NC;
if (RefKind == ARM64MCExpr::VK_ABS_G0)
return ELF::R_AARCH64_MOVW_UABS_G0;
if (RefKind == ARM64MCExpr::VK_ABS_G0_NC)
return ELF::R_AARCH64_MOVW_UABS_G0_NC;
if (RefKind == ARM64MCExpr::VK_DTPREL_G2)
return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2;
if (RefKind == ARM64MCExpr::VK_DTPREL_G1)
return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1;
if (RefKind == ARM64MCExpr::VK_DTPREL_G1_NC)
return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC;
if (RefKind == ARM64MCExpr::VK_DTPREL_G0)
return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0;
if (RefKind == ARM64MCExpr::VK_DTPREL_G0_NC)
return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC;
if (RefKind == ARM64MCExpr::VK_TPREL_G2)
return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2;
if (RefKind == ARM64MCExpr::VK_TPREL_G1)
return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1;
if (RefKind == ARM64MCExpr::VK_TPREL_G1_NC)
return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC;
if (RefKind == ARM64MCExpr::VK_TPREL_G0)
return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0;
if (RefKind == ARM64MCExpr::VK_TPREL_G0_NC)
return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC;
if (RefKind == ARM64MCExpr::VK_GOTTPREL_G1)
return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1;
if (RefKind == ARM64MCExpr::VK_GOTTPREL_G0_NC)
return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC;
report_fatal_error("invalid fixup for movz/movk instruction");
return 0;
case ARM64::fixup_arm64_tlsdesc_call:
return ELF::R_AARCH64_TLSDESC_CALL;
default:
llvm_unreachable("Unknown ELF relocation type");
}
}
llvm_unreachable("Unimplemented fixup -> relocation");
}
MCObjectWriter *llvm::createARM64ELFObjectWriter(raw_ostream &OS,
uint8_t OSABI) {
MCELFObjectTargetWriter *MOTW = new ARM64ELFObjectWriter(OSABI);
return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
}

View File

@ -0,0 +1,158 @@
//===- lib/MC/ARM64ELFStreamer.cpp - ELF Object Output for ARM64 ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file assembles .s files and emits AArch64 ELF .o object files. Different
// from generic ELF streamer in emitting mapping symbols ($x and $d) to delimit
// regions of data and code.
//
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCELFSymbolFlags.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace {
/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
/// the appropriate points in the object files. These symbols are defined in the
/// AArch64 ELF ABI:
/// infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf
///
/// In brief: $x or $d should be emitted at the start of each contiguous region
/// of A64 code or data in a section. In practice, this emission does not rely
/// on explicit assembler directives but on inherent properties of the
/// directives doing the emission (e.g. ".byte" is data, "add x0, x0, x0" an
/// instruction).
///
/// As a result this system is orthogonal to the DataRegion infrastructure used
/// by MachO. Beware!
class ARM64ELFStreamer : public MCELFStreamer {
public:
ARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
MCCodeEmitter *Emitter)
: MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0),
LastEMS(EMS_None) {}
~ARM64ELFStreamer() {}
virtual void ChangeSection(const MCSection *Section,
const MCExpr *Subsection) {
// We have to keep track of the mapping symbol state of any sections we
// use. Each one should start off as EMS_None, which is provided as the
// default constructor by DenseMap::lookup.
LastMappingSymbols[getPreviousSection().first] = LastEMS;
LastEMS = LastMappingSymbols.lookup(Section);
MCELFStreamer::ChangeSection(Section, Subsection);
}
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to add the appropriate mapping symbol if
/// necessary.
virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) {
EmitA64MappingSymbol();
MCELFStreamer::EmitInstruction(Inst, STI);
}
/// This is one of the functions used to emit data into an ELF section, so the
/// ARM64 streamer overrides it to add the appropriate mapping symbol ($d)
/// if necessary.
virtual void EmitBytes(StringRef Data) {
EmitDataMappingSymbol();
MCELFStreamer::EmitBytes(Data);
}
/// This is one of the functions used to emit data into an ELF section, so the
/// ARM64 streamer overrides it to add the appropriate mapping symbol ($d)
/// if necessary.
virtual void EmitValueImpl(const MCExpr *Value, unsigned Size) {
EmitDataMappingSymbol();
MCELFStreamer::EmitValueImpl(Value, Size);
}
private:
enum ElfMappingSymbol {
EMS_None,
EMS_A64,
EMS_Data
};
void EmitDataMappingSymbol() {
if (LastEMS == EMS_Data)
return;
EmitMappingSymbol("$d");
LastEMS = EMS_Data;
}
void EmitA64MappingSymbol() {
if (LastEMS == EMS_A64)
return;
EmitMappingSymbol("$x");
LastEMS = EMS_A64;
}
void EmitMappingSymbol(StringRef Name) {
MCSymbol *Start = getContext().CreateTempSymbol();
EmitLabel(Start);
MCSymbol *Symbol = getContext().GetOrCreateSymbol(
Name + "." + Twine(MappingSymbolCounter++));
MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
MCELF::SetType(SD, ELF::STT_NOTYPE);
MCELF::SetBinding(SD, ELF::STB_LOCAL);
SD.setExternal(false);
Symbol->setSection(*getCurrentSection().first);
const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
Symbol->setVariableValue(Value);
}
int64_t MappingSymbolCounter;
DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
ElfMappingSymbol LastEMS;
/// @}
};
}
namespace llvm {
MCELFStreamer *createARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
bool RelaxAll, bool NoExecStack) {
ARM64ELFStreamer *S = new ARM64ELFStreamer(Context, TAB, OS, Emitter);
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
if (NoExecStack)
S->getAssembler().setNoExecStack(true);
return S;
}
}

View File

@ -0,0 +1,26 @@
//===-- ARM64ELFStreamer.h - ELF Streamer for ARM64 -------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements ELF streamer information for the ARM64 backend.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_AARCH64_ELF_STREAMER_H
#define LLVM_AARCH64_ELF_STREAMER_H
#include "llvm/MC/MCELFStreamer.h"
namespace llvm {
MCELFStreamer *createARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
bool RelaxAll, bool NoExecStack);
}
#endif // ARM64_ELF_STREAMER_H

View File

@ -0,0 +1,72 @@
//===-- ARM64FixupKinds.h - ARM64 Specific Fixup Entries --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ARM64FIXUPKINDS_H
#define LLVM_ARM64FIXUPKINDS_H
#include "llvm/MC/MCFixup.h"
namespace llvm {
namespace ARM64 {
enum Fixups {
// fixup_arm64_pcrel_adr_imm21 - A 21-bit pc-relative immediate inserted into
// an ADR instruction.
fixup_arm64_pcrel_adr_imm21 = FirstTargetFixupKind,
// fixup_arm64_pcrel_adrp_imm21 - A 21-bit pc-relative immediate inserted into
// an ADRP instruction.
fixup_arm64_pcrel_adrp_imm21,
// fixup_arm64_imm12 - 12-bit fixup for add/sub instructions.
// No alignment adjustment. All value bits are encoded.
fixup_arm64_add_imm12,
// fixup_arm64_ldst_imm12_* - unsigned 12-bit fixups for load and
// store instructions.
fixup_arm64_ldst_imm12_scale1,
fixup_arm64_ldst_imm12_scale2,
fixup_arm64_ldst_imm12_scale4,
fixup_arm64_ldst_imm12_scale8,
fixup_arm64_ldst_imm12_scale16,
// FIXME: comment
fixup_arm64_movw,
// fixup_arm64_pcrel_imm14 - The high 14 bits of a 21-bit pc-relative
// immediate.
fixup_arm64_pcrel_branch14,
// fixup_arm64_pcrel_imm19 - The high 19 bits of a 21-bit pc-relative
// immediate. Same encoding as fixup_arm64_pcrel_adrhi, except this
// is not used as part of a lo/hi pair and thus generates relocations
// directly when necessary.
fixup_arm64_pcrel_imm19,
// fixup_arm64_pcrel_branch26 - The high 26 bits of a 28-bit pc-relative
// immediate.
fixup_arm64_pcrel_branch26,
// fixup_arm64_pcrel_call26 - The high 26 bits of a 28-bit pc-relative
// immediate. Distinguished from branch26 only on ELF.
fixup_arm64_pcrel_call26,
// fixup_arm64_tlsdesc_call - zero-space placeholder for the ELF
// R_AARCH64_TLSDESC_CALL relocation.
fixup_arm64_tlsdesc_call,
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
};
} // end namespace ARM64
} // end namespace llvm
#endif

View File

@ -0,0 +1,92 @@
//===-- ARM64MCAsmInfo.cpp - ARM64 asm properties -----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the declarations of the ARM64MCAsmInfo properties.
//
//===----------------------------------------------------------------------===//
#include "ARM64MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
enum AsmWriterVariantTy {
Default = -1,
Generic = 0,
Apple = 1
};
static cl::opt<AsmWriterVariantTy> AsmWriterVariant(
"arm64-neon-syntax", cl::init(Default),
cl::desc("Choose style of NEON code to emit from ARM64 backend:"),
cl::values(clEnumValN(Generic, "generic", "Emit generic NEON assembly"),
clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly"),
clEnumValEnd));
ARM64MCAsmInfoDarwin::ARM64MCAsmInfoDarwin() {
// We prefer NEON instructions to be printed in the short form.
AssemblerDialect = AsmWriterVariant == Default ? 1 : AsmWriterVariant;
PrivateGlobalPrefix = "L";
SeparatorString = "%%";
CommentString = ";";
PointerSize = CalleeSaveStackSlotSize = 8;
AlignmentIsInBytes = false;
UsesELFSectionDirectiveForBSS = true;
SupportsDebugInformation = true;
UseDataRegionDirectives = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
}
const MCExpr *ARM64MCAsmInfoDarwin::getExprForPersonalitySymbol(
const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const {
// On Darwin, we can reference dwarf symbols with foo@GOT-., which
// is an indirect pc-relative reference. The default implementation
// won't reference using the GOT, so we need this target-specific
// version.
MCContext &Context = Streamer.getContext();
const MCExpr *Res =
MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, Context);
MCSymbol *PCSym = Context.CreateTempSymbol();
Streamer.EmitLabel(PCSym);
const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context);
return MCBinaryExpr::CreateSub(Res, PC, Context);
}
ARM64MCAsmInfoELF::ARM64MCAsmInfoELF() {
// We prefer NEON instructions to be printed in the short form.
AssemblerDialect = AsmWriterVariant == Default ? 0 : AsmWriterVariant;
PointerSize = 8;
// ".comm align is in bytes but .align is pow-2."
AlignmentIsInBytes = false;
CommentString = "//";
PrivateGlobalPrefix = ".L";
Code32Directive = ".code\t32";
Data16bitsDirective = "\t.hword\t";
Data32bitsDirective = "\t.word\t";
Data64bitsDirective = "\t.xword\t";
UseDataRegionDirectives = false;
WeakRefDirective = "\t.weak\t";
HasLEB128 = true;
SupportsDebugInformation = true;
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
}

View File

@ -0,0 +1,36 @@
//=====-- ARM64MCAsmInfo.h - ARM64 asm properties -----------*- C++ -*--====//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the declaration of the ARM64MCAsmInfo class.
//
//===----------------------------------------------------------------------===//
#ifndef ARM64TARGETASMINFO_H
#define ARM64TARGETASMINFO_H
#include "llvm/MC/MCAsmInfoDarwin.h"
namespace llvm {
class Target;
class StringRef;
class MCStreamer;
struct ARM64MCAsmInfoDarwin : public MCAsmInfoDarwin {
explicit ARM64MCAsmInfoDarwin();
virtual const MCExpr *getExprForPersonalitySymbol(const MCSymbol *Sym,
unsigned Encoding,
MCStreamer &Streamer) const;
};
struct ARM64MCAsmInfoELF : public MCAsmInfo {
explicit ARM64MCAsmInfoELF();
};
} // namespace llvm
#endif

View File

@ -0,0 +1,563 @@
//===-- ARM64/ARM64MCCodeEmitter.cpp - Convert ARM64 code to machine code -===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the ARM64MCCodeEmitter class.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mccodeemitter"
#include "MCTargetDesc/ARM64AddressingModes.h"
#include "MCTargetDesc/ARM64BaseInfo.h"
#include "MCTargetDesc/ARM64FixupKinds.h"
#include "MCTargetDesc/ARM64MCExpr.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
STATISTIC(MCNumFixups, "Number of MC fixups created.");
namespace {
class ARM64MCCodeEmitter : public MCCodeEmitter {
MCContext &Ctx;
ARM64MCCodeEmitter(const ARM64MCCodeEmitter &); // DO NOT IMPLEMENT
void operator=(const ARM64MCCodeEmitter &); // DO NOT IMPLEMENT
public:
ARM64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
MCContext &ctx)
: Ctx(ctx) {}
~ARM64MCCodeEmitter() {}
// getBinaryCodeForInstr - TableGen'erated function for getting the
// binary encoding for an instruction.
uint64_t getBinaryCodeForInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
/// getAMIndexed8OpValue - Return encoding info for base register
/// and 12-bit unsigned immediate attached to a load, store or prfm
/// instruction. If operand requires a relocation, record it and
/// return zero in that part of the encoding.
template <uint32_t FixupKind>
uint32_t getAMIndexed8OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
/// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label
/// target.
uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
/// getAddSubImmOpValue - Return encoding for the 12-bit immediate value and
/// the 2-bit shift field.
uint32_t getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
/// getCondBranchTargetOpValue - Return the encoded value for a conditional
/// branch target.
uint32_t getCondBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
/// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and-
/// branch target.
uint32_t getTestBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
/// getBranchTargetOpValue - Return the encoded value for an unconditional
/// branch target.
uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
/// getMoveWideImmOpValue - Return the encoded value for the immediate operand
/// of a MOVZ or MOVK instruction.
uint32_t getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
/// getVecShifterOpValue - Return the encoded value for the vector shifter.
uint32_t getVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
/// getMoveVecShifterOpValue - Return the encoded value for the vector move
/// shifter (MSL).
uint32_t getMoveVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
/// getFixedPointScaleOpValue - Return the encoded value for the
// FP-to-fixed-point scale factor.
uint32_t getFixedPointScaleOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
uint32_t getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
uint32_t getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
uint32_t getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
uint32_t getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
uint32_t getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
uint32_t getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
uint32_t getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
uint32_t getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
/// getSIMDShift64OpValue - Return the encoded value for the
// shift-by-immediate AdvSIMD instructions.
uint32_t getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
uint32_t getSIMDShift64_32OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
uint32_t getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
uint32_t getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue,
const MCSubtargetInfo &STI) const;
void EmitByte(unsigned char C, raw_ostream &OS) const { OS << (char)C; }
void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const {
// Output the constant in little endian byte order.
for (unsigned i = 0; i != Size; ++i) {
EmitByte(Val & 255, OS);
Val >>= 8;
}
}
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
};
} // end anonymous namespace
MCCodeEmitter *llvm::createARM64MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new ARM64MCCodeEmitter(MCII, STI, Ctx);
}
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned
ARM64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
if (MO.isReg())
return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
else {
assert(MO.isImm() && "did not expect relocated expression");
return static_cast<unsigned>(MO.getImm());
}
assert(0 && "Unable to encode MCOperand!");
return 0;
}
template <uint32_t FixupKind>
uint32_t
ARM64MCCodeEmitter::getAMIndexed8OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
unsigned BaseReg = MI.getOperand(OpIdx).getReg();
BaseReg = Ctx.getRegisterInfo()->getEncodingValue(BaseReg);
const MCOperand &MO = MI.getOperand(OpIdx + 1);
uint32_t ImmVal = 0;
if (MO.isImm())
ImmVal = static_cast<uint32_t>(MO.getImm());
else {
assert(MO.isExpr() && "unable to encode load/store imm operand");
MCFixupKind Kind = MCFixupKind(FixupKind);
Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
++MCNumFixups;
}
return BaseReg | (ImmVal << 5);
}
/// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label
/// target.
uint32_t
ARM64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
// If the destination is an immediate, we have nothing to do.
if (MO.isImm())
return MO.getImm();
assert(MO.isExpr() && "Unexpected ADR target type!");
const MCExpr *Expr = MO.getExpr();
MCFixupKind Kind = MI.getOpcode() == ARM64::ADR
? MCFixupKind(ARM64::fixup_arm64_pcrel_adr_imm21)
: MCFixupKind(ARM64::fixup_arm64_pcrel_adrp_imm21);
Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
MCNumFixups += 1;
// All of the information is in the fixup.
return 0;
}
/// getAddSubImmOpValue - Return encoding for the 12-bit immediate value and
/// the 2-bit shift field. The shift field is stored in bits 13-14 of the
/// return value.
uint32_t
ARM64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
// Suboperands are [imm, shifter].
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
assert(ARM64_AM::getShiftType(MO1.getImm()) == ARM64_AM::LSL &&
"unexpected shift type for add/sub immediate");
unsigned ShiftVal = ARM64_AM::getShiftValue(MO1.getImm());
assert((ShiftVal == 0 || ShiftVal == 12) &&
"unexpected shift value for add/sub immediate");
if (MO.isImm())
return MO.getImm() | (ShiftVal == 0 ? 0 : (1 << 12));
assert(MO.isExpr() && "Unable to encode MCOperand!");
const MCExpr *Expr = MO.getExpr();
assert(ShiftVal == 0 && "shift not allowed on add/sub immediate with fixup");
// Encode the 12 bits of the fixup.
MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_add_imm12);
Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
++MCNumFixups;
return 0;
}
/// getCondBranchTargetOpValue - Return the encoded value for a conditional
/// branch target.
uint32_t ARM64MCCodeEmitter::getCondBranchTargetOpValue(
const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
// If the destination is an immediate, we have nothing to do.
if (MO.isImm())
return MO.getImm();
assert(MO.isExpr() && "Unexpected target type!");
MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_pcrel_imm19);
Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
++MCNumFixups;
// All of the information is in the fixup.
return 0;
}
uint32_t
ARM64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
if (MO.isImm())
return MO.getImm();
assert(MO.isExpr() && "Unexpected movz/movk immediate");
Fixups.push_back(MCFixup::Create(
0, MO.getExpr(), MCFixupKind(ARM64::fixup_arm64_movw), MI.getLoc()));
++MCNumFixups;
return 0;
}
/// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and-
/// branch target.
uint32_t ARM64MCCodeEmitter::getTestBranchTargetOpValue(
const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
// If the destination is an immediate, we have nothing to do.
if (MO.isImm())
return MO.getImm();
assert(MO.isExpr() && "Unexpected ADR target type!");
MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_pcrel_branch14);
Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
++MCNumFixups;
// All of the information is in the fixup.
return 0;
}
/// getBranchTargetOpValue - Return the encoded value for an unconditional
/// branch target.
uint32_t
ARM64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
// If the destination is an immediate, we have nothing to do.
if (MO.isImm())
return MO.getImm();
assert(MO.isExpr() && "Unexpected ADR target type!");
MCFixupKind Kind = MI.getOpcode() == ARM64::BL
? MCFixupKind(ARM64::fixup_arm64_pcrel_call26)
: MCFixupKind(ARM64::fixup_arm64_pcrel_branch26);
Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
++MCNumFixups;
// All of the information is in the fixup.
return 0;
}
/// getVecShifterOpValue - Return the encoded value for the vector shifter:
///
/// 00 -> 0
/// 01 -> 8
/// 10 -> 16
/// 11 -> 24
uint32_t
ARM64MCCodeEmitter::getVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
assert(MO.isImm() && "Expected an immediate value for the shift amount!");
switch (MO.getImm()) {
default:
break;
case 0:
return 0;
case 8:
return 1;
case 16:
return 2;
case 24:
return 3;
}
assert(false && "Invalid value for vector shift amount!");
return 0;
}
uint32_t
ARM64MCCodeEmitter::getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
assert(MO.isImm() && "Expected an immediate value for the shift amount!");
return 64 - (MO.getImm());
}
uint32_t
ARM64MCCodeEmitter::getSIMDShift64_32OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
assert(MO.isImm() && "Expected an immediate value for the shift amount!");
return 64 - (MO.getImm() | 32);
}
uint32_t
ARM64MCCodeEmitter::getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
assert(MO.isImm() && "Expected an immediate value for the shift amount!");
return 32 - (MO.getImm() | 16);
}
uint32_t
ARM64MCCodeEmitter::getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
assert(MO.isImm() && "Expected an immediate value for the shift amount!");
return 16 - (MO.getImm() | 8);
}
/// getFixedPointScaleOpValue - Return the encoded value for the
// FP-to-fixed-point scale factor.
uint32_t ARM64MCCodeEmitter::getFixedPointScaleOpValue(
const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
assert(MO.isImm() && "Expected an immediate value for the scale amount!");
return 64 - MO.getImm();
}
uint32_t
ARM64MCCodeEmitter::getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
assert(MO.isImm() && "Expected an immediate value for the scale amount!");
return 64 - MO.getImm();
}
uint32_t
ARM64MCCodeEmitter::getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
assert(MO.isImm() && "Expected an immediate value for the scale amount!");
return 32 - MO.getImm();
}
uint32_t
ARM64MCCodeEmitter::getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
assert(MO.isImm() && "Expected an immediate value for the scale amount!");
return 16 - MO.getImm();
}
uint32_t
ARM64MCCodeEmitter::getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
assert(MO.isImm() && "Expected an immediate value for the scale amount!");
return 8 - MO.getImm();
}
uint32_t
ARM64MCCodeEmitter::getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
assert(MO.isImm() && "Expected an immediate value for the scale amount!");
return MO.getImm() - 64;
}
uint32_t
ARM64MCCodeEmitter::getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
assert(MO.isImm() && "Expected an immediate value for the scale amount!");
return MO.getImm() - 32;
}
uint32_t
ARM64MCCodeEmitter::getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
assert(MO.isImm() && "Expected an immediate value for the scale amount!");
return MO.getImm() - 16;
}
uint32_t
ARM64MCCodeEmitter::getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
assert(MO.isImm() && "Expected an immediate value for the scale amount!");
return MO.getImm() - 8;
}
/// getMoveVecShifterOpValue - Return the encoded value for the vector move
/// shifter (MSL).
uint32_t
ARM64MCCodeEmitter::getMoveVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
assert(MO.isImm() &&
"Expected an immediate value for the move shift amount!");
unsigned ShiftVal = ARM64_AM::getShiftValue(MO.getImm());
assert((ShiftVal == 8 || ShiftVal == 16) && "Invalid shift amount!");
return ShiftVal == 8 ? 0 : 1;
}
unsigned ARM64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue,
const MCSubtargetInfo &STI) const {
// If one of the signed fixup kinds is applied to a MOVZ instruction, the
// eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's
// job to ensure that any bits possibly affected by this are 0. This means we
// must zero out bit 30 (essentially emitting a MOVN).
MCOperand UImm16MO = MI.getOperand(1);
// Nothing to do if there's no fixup.
if (UImm16MO.isImm())
return EncodedValue;
return EncodedValue & ~(1u << 30);
}
void ARM64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
if (MI.getOpcode() == ARM64::TLSDESCCALL) {
// This is a directive which applies an R_AARCH64_TLSDESC_CALL to the
// following (BLR) instruction. It doesn't emit any code itself so it
// doesn't go through the normal TableGenerated channels.
MCFixupKind Fixup = MCFixupKind(ARM64::fixup_arm64_tlsdesc_call);
Fixups.push_back(MCFixup::Create(0, MI.getOperand(0).getExpr(), Fixup));
return;
}
uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
EmitConstant(Binary, 4, OS);
++MCNumEmitted; // Keep track of the # of mi's emitted.
}
#include "ARM64GenMCCodeEmitter.inc"

View File

@ -0,0 +1,168 @@
//===-- ARM64MCExpr.cpp - ARM64 specific MC expression classes --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the implementation of the assembly expression modifiers
// accepted by the AArch64 architecture (e.g. ":lo12:", ":gottprel_g1:", ...).
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "aarch64symbolrefexpr"
#include "ARM64MCExpr.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/ELF.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
const ARM64MCExpr *ARM64MCExpr::Create(const MCExpr *Expr, VariantKind Kind,
MCContext &Ctx) {
return new (Ctx) ARM64MCExpr(Expr, Kind);
}
StringRef ARM64MCExpr::getVariantKindName() const {
switch (static_cast<uint32_t>(getKind())) {
case VK_CALL: return "";
case VK_LO12: return ":lo12:";
case VK_ABS_G3: return ":abs_g3:";
case VK_ABS_G2: return ":abs_g2:";
case VK_ABS_G2_NC: return ":abs_g2_nc:";
case VK_ABS_G1: return ":abs_g1:";
case VK_ABS_G1_NC: return ":abs_g1_nc:";
case VK_ABS_G0: return ":abs_g0:";
case VK_ABS_G0_NC: return ":abs_g0_nc:";
case VK_DTPREL_G2: return ":dtprel_g2:";
case VK_DTPREL_G1: return ":dtprel_g1:";
case VK_DTPREL_G1_NC: return ":dtprel_g1_nc:";
case VK_DTPREL_G0: return ":dtprel_g0:";
case VK_DTPREL_G0_NC: return ":dtprel_g0_nc:";
case VK_DTPREL_LO12: return ":dtprel_lo12:";
case VK_DTPREL_LO12_NC: return ":dtprel_lo12_nc:";
case VK_TPREL_G2: return ":tprel_g2:";
case VK_TPREL_G1: return ":tprel_g1:";
case VK_TPREL_G1_NC: return ":tprel_g1_nc:";
case VK_TPREL_G0: return ":tprel_g0:";
case VK_TPREL_G0_NC: return ":tprel_g0_nc:";
case VK_TPREL_LO12: return ":tprel_lo12:";
case VK_TPREL_LO12_NC: return ":tprel_lo12_nc:";
case VK_TLSDESC_LO12: return ":tlsdesc_lo12:";
case VK_ABS_PAGE: return "";
case VK_GOT_PAGE: return ":got:";
case VK_GOT_LO12: return ":got_lo12:";
case VK_GOTTPREL_PAGE: return ":gottprel:";
case VK_GOTTPREL_LO12_NC: return ":gottprel_lo12:";
case VK_GOTTPREL_G1: return ":gottprel_g1:";
case VK_GOTTPREL_G0_NC: return ":gottprel_g0_nc:";
case VK_TLSDESC: return "";
case VK_TLSDESC_PAGE: return ":tlsdesc:";
default:
llvm_unreachable("Invalid ELF symbol kind");
}
}
void ARM64MCExpr::PrintImpl(raw_ostream &OS) const {
if (getKind() != VK_NONE)
OS << getVariantKindName();
OS << *Expr;
}
// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
// that method should be made public?
// FIXME: really do above: now that two backends are using it.
static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
switch (Value->getKind()) {
case MCExpr::Target:
llvm_unreachable("Can't handle nested target expr!");
break;
case MCExpr::Constant:
break;
case MCExpr::Binary: {
const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
AddValueSymbolsImpl(BE->getLHS(), Asm);
AddValueSymbolsImpl(BE->getRHS(), Asm);
break;
}
case MCExpr::SymbolRef:
Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
break;
case MCExpr::Unary:
AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
break;
}
}
void ARM64MCExpr::AddValueSymbols(MCAssembler *Asm) const {
AddValueSymbolsImpl(getSubExpr(), Asm);
}
const MCSection *ARM64MCExpr::FindAssociatedSection() const {
llvm_unreachable("FIXME: what goes here?");
}
bool ARM64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout) const {
if (!getSubExpr()->EvaluateAsRelocatable(Res, Layout))
return false;
Res =
MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind());
return true;
}
static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
switch (Expr->getKind()) {
case MCExpr::Target:
llvm_unreachable("Can't handle nested target expression");
break;
case MCExpr::Constant:
break;
case MCExpr::Binary: {
const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
break;
}
case MCExpr::SymbolRef: {
// We're known to be under a TLS fixup, so any symbol should be
// modified. There should be only one.
const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol());
MCELF::SetType(SD, ELF::STT_TLS);
break;
}
case MCExpr::Unary:
fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
break;
}
}
void ARM64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
switch (getSymbolLoc(Kind)) {
default:
return;
case VK_DTPREL:
case VK_GOTTPREL:
case VK_TPREL:
case VK_TLSDESC:
break;
}
fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
}

View File

@ -0,0 +1,162 @@
//=---- ARM64MCExpr.h - ARM64 specific MC expression classes ------*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes ARM64-specific MCExprs, used for modifiers like
// ":lo12:" or ":gottprel_g1:".
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ARM64MCEXPR_H
#define LLVM_ARM64MCEXPR_H
#include "llvm/MC/MCExpr.h"
#include "llvm/Support/ErrorHandling.h"
namespace llvm {
class ARM64MCExpr : public MCTargetExpr {
public:
enum VariantKind {
VK_NONE = 0x000,
// Symbol locations specifying (roughly speaking) what calculation should be
// performed to construct the final address for the relocated
// symbol. E.g. direct, via the GOT, ...
VK_ABS = 0x001,
VK_SABS = 0x002,
VK_GOT = 0x003,
VK_DTPREL = 0x004,
VK_GOTTPREL = 0x005,
VK_TPREL = 0x006,
VK_TLSDESC = 0x007,
VK_SymLocBits = 0x00f,
// Variants specifying which part of the final address calculation is
// used. E.g. the low 12 bits for an ADD/LDR, the middle 16 bits for a
// MOVZ/MOVK.
VK_PAGE = 0x010,
VK_PAGEOFF = 0x020,
VK_G0 = 0x030,
VK_G1 = 0x040,
VK_G2 = 0x050,
VK_G3 = 0x060,
VK_AddressFragBits = 0x0f0,
// Whether the final relocation is a checked one (where a linker should
// perform a range-check on the final address) or not. Note that this field
// is unfortunately sometimes omitted from the assembly syntax. E.g. :lo12:
// on its own is a non-checked relocation. We side with ELF on being
// explicit about this!
VK_NC = 0x100,
// Convenience definitions for referring to specific textual representations
// of relocation specifiers. Note that this means the "_NC" is sometimes
// omitted in line with assembly syntax here (VK_LO12 rather than VK_LO12_NC
// since a user would write ":lo12:").
VK_CALL = VK_ABS,
VK_ABS_PAGE = VK_ABS | VK_PAGE,
VK_ABS_G3 = VK_ABS | VK_G3,
VK_ABS_G2 = VK_ABS | VK_G2,
VK_ABS_G2_NC = VK_ABS | VK_G2 | VK_NC,
VK_ABS_G1 = VK_ABS | VK_G1,
VK_ABS_G1_NC = VK_ABS | VK_G1 | VK_NC,
VK_ABS_G0 = VK_ABS | VK_G0,
VK_ABS_G0_NC = VK_ABS | VK_G0 | VK_NC,
VK_LO12 = VK_ABS | VK_PAGEOFF | VK_NC,
VK_GOT_LO12 = VK_GOT | VK_PAGEOFF | VK_NC,
VK_GOT_PAGE = VK_GOT | VK_PAGE,
VK_DTPREL_G2 = VK_DTPREL | VK_G2,
VK_DTPREL_G1 = VK_DTPREL | VK_G1,
VK_DTPREL_G1_NC = VK_DTPREL | VK_G1 | VK_NC,
VK_DTPREL_G0 = VK_DTPREL | VK_G0,
VK_DTPREL_G0_NC = VK_DTPREL | VK_G0 | VK_NC,
VK_DTPREL_LO12 = VK_DTPREL | VK_PAGEOFF,
VK_DTPREL_LO12_NC = VK_DTPREL | VK_PAGEOFF | VK_NC,
VK_GOTTPREL_PAGE = VK_GOTTPREL | VK_PAGE,
VK_GOTTPREL_LO12_NC = VK_GOTTPREL | VK_PAGEOFF | VK_NC,
VK_GOTTPREL_G1 = VK_GOTTPREL | VK_G1,
VK_GOTTPREL_G0_NC = VK_GOTTPREL | VK_G0 | VK_NC,
VK_TPREL_G2 = VK_TPREL | VK_G2,
VK_TPREL_G1 = VK_TPREL | VK_G1,
VK_TPREL_G1_NC = VK_TPREL | VK_G1 | VK_NC,
VK_TPREL_G0 = VK_TPREL | VK_G0,
VK_TPREL_G0_NC = VK_TPREL | VK_G0 | VK_NC,
VK_TPREL_LO12 = VK_TPREL | VK_PAGEOFF,
VK_TPREL_LO12_NC = VK_TPREL | VK_PAGEOFF | VK_NC,
VK_TLSDESC_LO12 = VK_TLSDESC | VK_PAGEOFF | VK_NC,
VK_TLSDESC_PAGE = VK_TLSDESC | VK_PAGE,
VK_INVALID = 0xfff
};
private:
const MCExpr *Expr;
const VariantKind Kind;
explicit ARM64MCExpr(const MCExpr *Expr, VariantKind Kind)
: Expr(Expr), Kind(Kind) {}
public:
/// @name Construction
/// @{
static const ARM64MCExpr *Create(const MCExpr *Expr, VariantKind Kind,
MCContext &Ctx);
/// @}
/// @name Accessors
/// @{
/// Get the kind of this expression.
VariantKind getKind() const { return static_cast<VariantKind>(Kind); }
/// Get the expression this modifier applies to.
const MCExpr *getSubExpr() const { return Expr; }
/// @}
/// @name VariantKind information extractors.
/// @{
static VariantKind getSymbolLoc(VariantKind Kind) {
return static_cast<VariantKind>(Kind & VK_SymLocBits);
}
static VariantKind getAddressFrag(VariantKind Kind) {
return static_cast<VariantKind>(Kind & VK_AddressFragBits);
}
static bool isNotChecked(VariantKind Kind) { return Kind & VK_NC; }
/// @}
/// Convert the variant kind into an ELF-appropriate modifier
/// (e.g. ":got:", ":lo12:").
StringRef getVariantKindName() const;
void PrintImpl(raw_ostream &OS) const;
void AddValueSymbols(MCAssembler *) const;
const MCSection *FindAssociatedSection() const;
bool EvaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout) const;
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const;
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
}
static bool classof(const ARM64MCExpr *) { return true; }
};
} // end namespace llvm
#endif

View File

@ -0,0 +1,167 @@
//===-- ARM64MCTargetDesc.cpp - ARM64 Target Descriptions -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file provides ARM64 specific target descriptions.
//
//===----------------------------------------------------------------------===//
#include "ARM64MCTargetDesc.h"
#include "ARM64ELFStreamer.h"
#include "ARM64MCAsmInfo.h"
#include "InstPrinter/ARM64InstPrinter.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "ARM64GenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
#include "ARM64GenSubtargetInfo.inc"
#define GET_REGINFO_MC_DESC
#include "ARM64GenRegisterInfo.inc"
using namespace llvm;
static MCInstrInfo *createARM64MCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitARM64MCInstrInfo(X);
return X;
}
static MCSubtargetInfo *createARM64MCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS) {
MCSubtargetInfo *X = new MCSubtargetInfo();
InitARM64MCSubtargetInfo(X, TT, CPU, FS);
return X;
}
static MCRegisterInfo *createARM64MCRegisterInfo(StringRef Triple) {
MCRegisterInfo *X = new MCRegisterInfo();
InitARM64MCRegisterInfo(X, ARM64::LR);
return X;
}
static MCAsmInfo *createARM64MCAsmInfo(const MCRegisterInfo &MRI,
StringRef TT) {
Triple TheTriple(TT);
MCAsmInfo *MAI;
if (TheTriple.isOSDarwin())
MAI = new ARM64MCAsmInfoDarwin();
else {
assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF");
MAI = new ARM64MCAsmInfoELF();
}
// Initial state of the frame pointer is SP.
unsigned Reg = MRI.getDwarfRegNum(ARM64::SP, true);
MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, Reg, 0);
MAI->addInitialFrameState(Inst);
return MAI;
}
MCCodeGenInfo *createARM64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
Triple TheTriple(TT);
assert((TheTriple.isOSBinFormatELF() || TheTriple.isOSBinFormatMachO()) &&
"Only expect Darwin and ELF targets");
if (CM == CodeModel::Default)
CM = CodeModel::Small;
// The default MCJIT memory managers make no guarantees about where they can
// find an executable page; JITed code needs to be able to refer to globals
// no matter how far away they are.
else if (CM == CodeModel::JITDefault)
CM = CodeModel::Large;
else if (CM != CodeModel::Small && CM != CodeModel::Large)
report_fatal_error("Only small and large code models are allowed on ARM64");
// ARM64 Darwin is always PIC.
if (TheTriple.isOSDarwin())
RM = Reloc::PIC_;
// On ELF platforms the default static relocation model has a smart enough
// linker to cope with referencing external symbols defined in a shared
// library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
else if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC)
RM = Reloc::Static;
MCCodeGenInfo *X = new MCCodeGenInfo();
X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
static MCInstPrinter *createARM64MCInstPrinter(const Target &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) {
if (SyntaxVariant == 0)
return new ARM64InstPrinter(MAI, MII, MRI, STI);
if (SyntaxVariant == 1)
return new ARM64AppleInstPrinter(MAI, MII, MRI, STI);
return 0;
}
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
const MCSubtargetInfo &STI, bool RelaxAll,
bool NoExecStack) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin())
return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
/*LabelSections*/ true);
return createARM64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
}
// Force static initialization.
extern "C" void LLVMInitializeARM64TargetMC() {
// Register the MC asm info.
RegisterMCAsmInfoFn X(TheARM64Target, createARM64MCAsmInfo);
// Register the MC codegen info.
TargetRegistry::RegisterMCCodeGenInfo(TheARM64Target,
createARM64MCCodeGenInfo);
// Register the MC instruction info.
TargetRegistry::RegisterMCInstrInfo(TheARM64Target, createARM64MCInstrInfo);
// Register the MC register info.
TargetRegistry::RegisterMCRegInfo(TheARM64Target, createARM64MCRegisterInfo);
// Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(TheARM64Target,
createARM64MCSubtargetInfo);
// Register the asm backend.
TargetRegistry::RegisterMCAsmBackend(TheARM64Target, createARM64AsmBackend);
// Register the MC Code Emitter
TargetRegistry::RegisterMCCodeEmitter(TheARM64Target,
createARM64MCCodeEmitter);
// Register the object streamer.
TargetRegistry::RegisterMCObjectStreamer(TheARM64Target, createMCStreamer);
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(TheARM64Target,
createARM64MCInstPrinter);
}

View File

@ -0,0 +1,62 @@
//===-- ARM64MCTargetDesc.h - ARM64 Target Descriptions ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file provides ARM64 specific target descriptions.
//
//===----------------------------------------------------------------------===//
#ifndef ARM64MCTARGETDESC_H
#define ARM64MCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
#include <string>
namespace llvm {
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
class MCRegisterInfo;
class MCObjectWriter;
class MCSubtargetInfo;
class StringRef;
class Target;
class raw_ostream;
extern Target TheARM64Target;
MCCodeEmitter *createARM64MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createARM64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU);
MCObjectWriter *createARM64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI);
MCObjectWriter *createARM64MachObjectWriter(raw_ostream &OS, uint32_t CPUType,
uint32_t CPUSubtype);
} // End llvm namespace
// Defines symbolic names for ARM64 registers. This defines a mapping from
// register name to register number.
//
#define GET_REGINFO_ENUM
#include "ARM64GenRegisterInfo.inc"
// Defines symbolic names for the ARM64 instructions.
//
#define GET_INSTRINFO_ENUM
#include "ARM64GenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
#include "ARM64GenSubtargetInfo.inc"
#endif

View File

@ -0,0 +1,396 @@
//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/ARM64FixupKinds.h"
#include "MCTargetDesc/ARM64MCTargetDesc.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachO.h"
using namespace llvm;
namespace {
class ARM64MachObjectWriter : public MCMachObjectTargetWriter {
bool getARM64FixupKindMachOInfo(const MCFixup &Fixup, unsigned &RelocType,
const MCSymbolRefExpr *Sym,
unsigned &Log2Size, const MCAssembler &Asm);
public:
ARM64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype)
: MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype,
/*UseAggressiveSymbolFolding=*/true) {}
void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
const MCAsmLayout &Layout, const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue);
};
}
bool ARM64MachObjectWriter::getARM64FixupKindMachOInfo(
const MCFixup &Fixup, unsigned &RelocType, const MCSymbolRefExpr *Sym,
unsigned &Log2Size, const MCAssembler &Asm) {
RelocType = unsigned(MachO::ARM64_RELOC_UNSIGNED);
Log2Size = ~0U;
switch ((unsigned)Fixup.getKind()) {
default:
return false;
case FK_Data_1:
Log2Size = llvm::Log2_32(1);
return true;
case FK_Data_2:
Log2Size = llvm::Log2_32(2);
return true;
case FK_Data_4:
Log2Size = llvm::Log2_32(4);
if (Sym->getKind() == MCSymbolRefExpr::VK_GOT)
RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT);
return true;
case FK_Data_8:
Log2Size = llvm::Log2_32(8);
if (Sym->getKind() == MCSymbolRefExpr::VK_GOT)
RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT);
return true;
case ARM64::fixup_arm64_add_imm12:
case ARM64::fixup_arm64_ldst_imm12_scale1:
case ARM64::fixup_arm64_ldst_imm12_scale2:
case ARM64::fixup_arm64_ldst_imm12_scale4:
case ARM64::fixup_arm64_ldst_imm12_scale8:
case ARM64::fixup_arm64_ldst_imm12_scale16:
Log2Size = llvm::Log2_32(4);
switch (Sym->getKind()) {
default:
assert(0 && "Unexpected symbol reference variant kind!");
case MCSymbolRefExpr::VK_PAGEOFF:
RelocType = unsigned(MachO::ARM64_RELOC_PAGEOFF12);
return true;
case MCSymbolRefExpr::VK_GOTPAGEOFF:
RelocType = unsigned(MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12);
return true;
case MCSymbolRefExpr::VK_TLVPPAGEOFF:
RelocType = unsigned(MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12);
return true;
}
case ARM64::fixup_arm64_pcrel_adrp_imm21:
Log2Size = llvm::Log2_32(4);
// This encompasses the relocation for the whole 21-bit value.
switch (Sym->getKind()) {
default:
Asm.getContext().FatalError(Fixup.getLoc(),
"ADR/ADRP relocations must be GOT relative");
case MCSymbolRefExpr::VK_PAGE:
RelocType = unsigned(MachO::ARM64_RELOC_PAGE21);
return true;
case MCSymbolRefExpr::VK_GOTPAGE:
RelocType = unsigned(MachO::ARM64_RELOC_GOT_LOAD_PAGE21);
return true;
case MCSymbolRefExpr::VK_TLVPPAGE:
RelocType = unsigned(MachO::ARM64_RELOC_TLVP_LOAD_PAGE21);
return true;
}
return true;
case ARM64::fixup_arm64_pcrel_branch26:
case ARM64::fixup_arm64_pcrel_call26:
Log2Size = llvm::Log2_32(4);
RelocType = unsigned(MachO::ARM64_RELOC_BRANCH26);
return true;
}
}
void ARM64MachObjectWriter::RecordRelocation(
MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
// See <reloc.h>.
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment);
unsigned Log2Size = 0;
int64_t Value = 0;
unsigned Index = 0;
unsigned IsExtern = 0;
unsigned Type = 0;
unsigned Kind = Fixup.getKind();
FixupOffset += Fixup.getOffset();
// ARM64 pcrel relocation addends do not include the section offset.
if (IsPCRel)
FixedValue += FixupOffset;
// ADRP fixups use relocations for the whole symbol value and only
// put the addend in the instruction itself. Clear out any value the
// generic code figured out from the sybmol definition.
if (Kind == ARM64::fixup_arm64_pcrel_adrp_imm21 ||
Kind == ARM64::fixup_arm64_pcrel_imm19)
FixedValue = 0;
// imm19 relocations are for conditional branches, which require
// assembler local symbols. If we got here, that's not what we have,
// so complain loudly.
if (Kind == ARM64::fixup_arm64_pcrel_imm19) {
Asm.getContext().FatalError(Fixup.getLoc(),
"conditional branch requires assembler-local"
" label. '" +
Target.getSymA()->getSymbol().getName() +
"' is external.");
return;
}
// 14-bit branch relocations should only target internal labels, and so
// should never get here.
if (Kind == ARM64::fixup_arm64_pcrel_branch14) {
Asm.getContext().FatalError(Fixup.getLoc(),
"Invalid relocation on conditional branch!");
return;
}
if (!getARM64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size,
Asm)) {
Asm.getContext().FatalError(Fixup.getLoc(), "unknown ARM64 fixup kind!");
return;
}
Value = Target.getConstant();
if (Target.isAbsolute()) { // constant
// FIXME: Should this always be extern?
// SymbolNum of 0 indicates the absolute section.
Type = MachO::ARM64_RELOC_UNSIGNED;
Index = 0;
if (IsPCRel) {
IsExtern = 1;
Asm.getContext().FatalError(Fixup.getLoc(),
"PC relative absolute relocation!");
// FIXME: x86_64 sets the type to a branch reloc here. Should we do
// something similar?
}
} else if (Target.getSymB()) { // A - B + constant
const MCSymbol *A = &Target.getSymA()->getSymbol();
MCSymbolData &A_SD = Asm.getSymbolData(*A);
const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
const MCSymbol *B = &Target.getSymB()->getSymbol();
MCSymbolData &B_SD = Asm.getSymbolData(*B);
const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
// Check for "_foo@got - .", which comes through here as:
// Ltmp0:
// ... _foo@got - Ltmp0
if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOT &&
Target.getSymB()->getKind() == MCSymbolRefExpr::VK_None &&
Layout.getSymbolOffset(&B_SD) ==
Layout.getFragmentOffset(Fragment) + Fixup.getOffset()) {
// SymB is the PC, so use a PC-rel pointer-to-GOT relocation.
Index = A_Base->getIndex();
IsExtern = 1;
Type = MachO::ARM64_RELOC_POINTER_TO_GOT;
IsPCRel = 1;
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
(IsExtern << 27) | (Type << 28));
Writer->addRelocation(Fragment->getParent(), MRE);
return;
} else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
// Otherwise, neither symbol can be modified.
Asm.getContext().FatalError(Fixup.getLoc(),
"unsupported relocation of modified symbol");
// We don't support PCrel relocations of differences.
if (IsPCRel)
Asm.getContext().FatalError(Fixup.getLoc(),
"unsupported pc-relative relocation of "
"difference");
// ARM64 always uses external relocations. If there is no symbol to use as
// a base address (a local symbol with no preceeding non-local symbol),
// error out.
//
// FIXME: We should probably just synthesize an external symbol and use
// that.
if (!A_Base)
Asm.getContext().FatalError(
Fixup.getLoc(),
"unsupported relocation of local symbol '" + A->getName() +
"'. Must have non-local symbol earlier in section.");
if (!B_Base)
Asm.getContext().FatalError(
Fixup.getLoc(),
"unsupported relocation of local symbol '" + B->getName() +
"'. Must have non-local symbol earlier in section.");
if (A_Base == B_Base && A_Base)
Asm.getContext().FatalError(Fixup.getLoc(),
"unsupported relocation with identical base");
Value += (A_SD.getFragment() == NULL ? 0 : Writer->getSymbolAddress(
&A_SD, Layout)) -
(A_Base == NULL || A_Base->getFragment() == NULL
? 0
: Writer->getSymbolAddress(A_Base, Layout));
Value -= (B_SD.getFragment() == NULL ? 0 : Writer->getSymbolAddress(
&B_SD, Layout)) -
(B_Base == NULL || B_Base->getFragment() == NULL
? 0
: Writer->getSymbolAddress(B_Base, Layout));
Index = A_Base->getIndex();
IsExtern = 1;
Type = MachO::ARM64_RELOC_UNSIGNED;
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
(IsExtern << 27) | (Type << 28));
Writer->addRelocation(Fragment->getParent(), MRE);
Index = B_Base->getIndex();
IsExtern = 1;
Type = MachO::ARM64_RELOC_SUBTRACTOR;
} else { // A + constant
const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
MCSymbolData &SD = Asm.getSymbolData(*Symbol);
const MCSymbolData *Base = Asm.getAtom(&SD);
const MCSectionMachO &Section = static_cast<const MCSectionMachO &>(
Fragment->getParent()->getSection());
// If the symbol is a variable and we weren't able to get a Base for it
// (i.e., it's not in the symbol table associated with a section) resolve
// the relocation based its expansion instead.
if (Symbol->isVariable() && !Base) {
// If the evaluation is an absolute value, just use that directly
// to keep things easy.
int64_t Res;
if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute(
Res, Layout, Writer->getSectionAddressMap())) {
FixedValue = Res;
return;
}
// FIXME: Will the Target we already have ever have any data in it
// we need to preserve and merge with the new Target? How about
// the FixedValue?
if (!Symbol->getVariableValue()->EvaluateAsRelocatable(Target, &Layout))
Asm.getContext().FatalError(Fixup.getLoc(),
"unable to resolve variable '" +
Symbol->getName() + "'");
return RecordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
FixedValue);
}
// Relocations inside debug sections always use local relocations when
// possible. This seems to be done because the debugger doesn't fully
// understand relocation entries and expects to find values that
// have already been fixed up.
if (Symbol->isInSection()) {
if (Section.hasAttribute(MachO::S_ATTR_DEBUG))
Base = 0;
}
// ARM64 uses external relocations as much as possible. For debug sections,
// and for pointer-sized relocations (.quad), we allow section relocations.
// It's code sections that run into trouble.
if (Base) {
Index = Base->getIndex();
IsExtern = 1;
// Add the local offset, if needed.
if (Base != &SD)
Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
} else if (Symbol->isInSection()) {
// Pointer-sized relocations can use a local relocation. Otherwise,
// we have to be in a debug info section.
if (!Section.hasAttribute(MachO::S_ATTR_DEBUG) && Log2Size != 3)
Asm.getContext().FatalError(
Fixup.getLoc(),
"unsupported relocation of local symbol '" + Symbol->getName() +
"'. Must have non-local symbol earlier in section.");
// Adjust the relocation to be section-relative.
// The index is the section ordinal (1-based).
const MCSectionData &SymSD =
Asm.getSectionData(SD.getSymbol().getSection());
Index = SymSD.getOrdinal() + 1;
IsExtern = 0;
Value += Writer->getSymbolAddress(&SD, Layout);
if (IsPCRel)
Value -= Writer->getFragmentAddress(Fragment, Layout) +
Fixup.getOffset() + (1 << Log2Size);
} else {
// Resolve constant variables.
if (SD.getSymbol().isVariable()) {
int64_t Res;
if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute(
Res, Layout, Writer->getSectionAddressMap())) {
FixedValue = Res;
return;
}
}
Asm.getContext().FatalError(Fixup.getLoc(),
"unsupported relocation of variable '" +
Symbol->getName() + "'");
}
}
// If the relocation kind is Branch26, Page21, or Pageoff12, any addend
// is represented via an Addend relocation, not encoded directly into
// the instruction.
if ((Type == MachO::ARM64_RELOC_BRANCH26 ||
Type == MachO::ARM64_RELOC_PAGE21 ||
Type == MachO::ARM64_RELOC_PAGEOFF12) &&
Value) {
assert((Value & 0xff000000) == 0 && "Added relocation out of range!");
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
(IsExtern << 27) | (Type << 28));
Writer->addRelocation(Fragment->getParent(), MRE);
// Now set up the Addend relocation.
Type = MachO::ARM64_RELOC_ADDEND;
Index = Value;
IsPCRel = 0;
Log2Size = 2;
IsExtern = 0;
// Put zero into the instruction itself. The addend is in the relocation.
Value = 0;
}
// If there's any addend left to handle, encode it in the instruction.
FixedValue = Value;
// struct relocation_info (8 bytes)
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
(IsExtern << 27) | (Type << 28));
Writer->addRelocation(Fragment->getParent(), MRE);
}
MCObjectWriter *llvm::createARM64MachObjectWriter(raw_ostream &OS,
uint32_t CPUType,
uint32_t CPUSubtype) {
return createMachObjectWriter(new ARM64MachObjectWriter(CPUType, CPUSubtype),
OS, /*IsLittleEndian=*/true);
}

View File

@ -0,0 +1,14 @@
add_llvm_library(LLVMARM64Desc
ARM64AsmBackend.cpp
ARM64ELFObjectWriter.cpp
ARM64ELFStreamer.cpp
ARM64MCAsmInfo.cpp
ARM64MCCodeEmitter.cpp
ARM64MCExpr.cpp
ARM64MCTargetDesc.cpp
ARM64MachObjectWriter.cpp
)
add_dependencies(LLVMARM64Desc ARM64CommonTableGen)
# Hack: we need to include 'main' target directory to grab private headers
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)

View File

@ -0,0 +1,24 @@
;===- ./lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[component_0]
type = Library
name = ARM64Desc
parent = ARM64
required_libraries = ARM64AsmPrinter ARM64Info MC Support
add_to_library_groups = ARM64

View File

@ -0,0 +1,16 @@
##===- lib/Target/ARM64/TargetDesc/Makefile ----------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
LIBRARYNAME = LLVMARM64Desc
# Hack: we need to include 'main' target directory to grab private headers
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common

25
lib/Target/ARM64/Makefile Normal file
View File

@ -0,0 +1,25 @@
##===- lib/Target/ARM64/Makefile ---------------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../..
LIBRARYNAME = LLVMARM64CodeGen
TARGET = ARM64
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = ARM64GenRegisterInfo.inc ARM64GenInstrInfo.inc \
ARM64GenAsmWriter.inc ARM64GenAsmWriter1.inc \
ARM64GenDAGISel.inc \
ARM64GenCallingConv.inc ARM64GenAsmMatcher.inc \
ARM64GenSubtargetInfo.inc ARM64GenMCCodeEmitter.inc \
ARM64GenFastISel.inc ARM64GenDisassemblerTables.inc \
ARM64GenMCPseudoLowering.inc
DIRS = TargetInfo InstPrinter AsmParser Disassembler MCTargetDesc
include $(LEVEL)/Makefile.common

Some files were not shown because too many files have changed in this diff Show More