mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-25 14:32:53 +00:00
ARM64: initial backend import
This adds a second implementation of the AArch64 architecture to LLVM, accessible in parallel via the "arm64" triple. The plan over the coming weeks & months is to merge the two into a single backend, during which time thorough code review should naturally occur. Everything will be easier with the target in-tree though, hence this commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205090 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
69bd9577fc
commit
7b837d8c75
@ -128,6 +128,7 @@ set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name
|
||||
|
||||
set(LLVM_ALL_TARGETS
|
||||
AArch64
|
||||
ARM64
|
||||
ARM
|
||||
CppBackend
|
||||
Hexagon
|
||||
@ -143,7 +144,7 @@ set(LLVM_ALL_TARGETS
|
||||
)
|
||||
|
||||
# List of targets with JIT support:
|
||||
set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM Mips SystemZ)
|
||||
set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM64 ARM Mips SystemZ)
|
||||
|
||||
set(LLVM_TARGETS_TO_BUILD "all"
|
||||
CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
|
||||
|
@ -419,6 +419,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
|
||||
amd64-* | x86_64-*) llvm_cv_target_arch="x86_64" ;;
|
||||
sparc*-*) llvm_cv_target_arch="Sparc" ;;
|
||||
powerpc*-*) llvm_cv_target_arch="PowerPC" ;;
|
||||
arm64*-*) llvm_cv_target_arch="ARM64" ;;
|
||||
arm*-*) llvm_cv_target_arch="ARM" ;;
|
||||
aarch64*-*) llvm_cv_target_arch="AArch64" ;;
|
||||
mips-* | mips64-*) llvm_cv_target_arch="Mips" ;;
|
||||
@ -454,6 +455,7 @@ case $host in
|
||||
amd64-* | x86_64-*) host_arch="x86_64" ;;
|
||||
sparc*-*) host_arch="Sparc" ;;
|
||||
powerpc*-*) host_arch="PowerPC" ;;
|
||||
arm64*-*) host_arch="ARM64" ;;
|
||||
arm*-*) host_arch="ARM" ;;
|
||||
aarch64*-*) host_arch="AArch64" ;;
|
||||
mips-* | mips64-*) host_arch="Mips" ;;
|
||||
@ -795,7 +797,7 @@ else
|
||||
esac
|
||||
fi
|
||||
|
||||
TARGETS_WITH_JIT="AArch64 ARM Mips PowerPC SystemZ X86"
|
||||
TARGETS_WITH_JIT="AArch64 ARM ARM64 Mips PowerPC SystemZ X86"
|
||||
AC_SUBST(TARGETS_WITH_JIT,$TARGETS_WITH_JIT)
|
||||
|
||||
dnl Allow enablement of building and installing docs
|
||||
@ -948,14 +950,14 @@ if test "$llvm_cv_enable_crash_overrides" = "yes" ; then
|
||||
fi
|
||||
|
||||
dnl List all possible targets
|
||||
ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
|
||||
ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
|
||||
AC_SUBST(ALL_TARGETS,$ALL_TARGETS)
|
||||
|
||||
dnl Allow specific targets to be specified for building (or not)
|
||||
TARGETS_TO_BUILD=""
|
||||
AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
|
||||
[Build specific host targets: all or target1,target2,... Valid targets are:
|
||||
host, x86, x86_64, sparc, powerpc, arm, aarch64, mips, hexagon,
|
||||
host, x86, x86_64, sparc, powerpc, arm64, arm, aarch64, mips, hexagon,
|
||||
xcore, msp430, nvptx, systemz, r600, and cpp (default=all)]),,
|
||||
enableval=all)
|
||||
if test "$enableval" = host-only ; then
|
||||
@ -970,6 +972,7 @@ case "$enableval" in
|
||||
sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
|
||||
powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
|
||||
aarch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
|
||||
arm64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
|
||||
arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
|
||||
mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
|
||||
mipsel) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
|
||||
|
@ -366,6 +366,8 @@ elseif (LLVM_NATIVE_ARCH MATCHES "powerpc")
|
||||
set(LLVM_NATIVE_ARCH PowerPC)
|
||||
elseif (LLVM_NATIVE_ARCH MATCHES "aarch64")
|
||||
set(LLVM_NATIVE_ARCH AArch64)
|
||||
elseif (LLVM_NATIVE_ARCH MATCHES "arm64")
|
||||
set(LLVM_NATIVE_ARCH ARM64)
|
||||
elseif (LLVM_NATIVE_ARCH MATCHES "arm")
|
||||
set(LLVM_NATIVE_ARCH ARM)
|
||||
elseif (LLVM_NATIVE_ARCH MATCHES "mips")
|
||||
|
13
configure
vendored
13
configure
vendored
@ -1447,9 +1447,9 @@ Optional Features:
|
||||
Enable crash handling overrides (default is YES)
|
||||
--enable-targets Build specific host targets: all or
|
||||
target1,target2,... Valid targets are: host, x86,
|
||||
x86_64, sparc, powerpc, arm, aarch64, mips, hexagon,
|
||||
xcore, msp430, nvptx, systemz, r600, and cpp
|
||||
(default=all)
|
||||
x86_64, sparc, powerpc, arm64, arm, aarch64, mips,
|
||||
hexagon, xcore, msp430, nvptx, systemz, r600, and
|
||||
cpp (default=all)
|
||||
--enable-experimental-targets
|
||||
Build experimental host targets: disable or
|
||||
target1,target2,... (default=disable)
|
||||
@ -4151,6 +4151,7 @@ else
|
||||
amd64-* | x86_64-*) llvm_cv_target_arch="x86_64" ;;
|
||||
sparc*-*) llvm_cv_target_arch="Sparc" ;;
|
||||
powerpc*-*) llvm_cv_target_arch="PowerPC" ;;
|
||||
arm64*-*) llvm_cv_target_arch="ARM64" ;;
|
||||
arm*-*) llvm_cv_target_arch="ARM" ;;
|
||||
aarch64*-*) llvm_cv_target_arch="AArch64" ;;
|
||||
mips-* | mips64-*) llvm_cv_target_arch="Mips" ;;
|
||||
@ -4187,6 +4188,7 @@ case $host in
|
||||
amd64-* | x86_64-*) host_arch="x86_64" ;;
|
||||
sparc*-*) host_arch="Sparc" ;;
|
||||
powerpc*-*) host_arch="PowerPC" ;;
|
||||
arm64*-*) host_arch="ARM64" ;;
|
||||
arm*-*) host_arch="ARM" ;;
|
||||
aarch64*-*) host_arch="AArch64" ;;
|
||||
mips-* | mips64-*) host_arch="Mips" ;;
|
||||
@ -5120,7 +5122,7 @@ else
|
||||
esac
|
||||
fi
|
||||
|
||||
TARGETS_WITH_JIT="AArch64 ARM Mips PowerPC SystemZ X86"
|
||||
TARGETS_WITH_JIT="AArch64 ARM ARM64 Mips PowerPC SystemZ X86"
|
||||
TARGETS_WITH_JIT=$TARGETS_WITH_JIT
|
||||
|
||||
|
||||
@ -5357,7 +5359,7 @@ _ACEOF
|
||||
|
||||
fi
|
||||
|
||||
ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
|
||||
ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
|
||||
ALL_TARGETS=$ALL_TARGETS
|
||||
|
||||
|
||||
@ -5381,6 +5383,7 @@ case "$enableval" in
|
||||
sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
|
||||
powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
|
||||
aarch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
|
||||
arm64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
|
||||
arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
|
||||
mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
|
||||
mipsel) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
|
||||
|
@ -95,6 +95,16 @@ struct LLVMOpInfo1 {
|
||||
#define LLVMDisassembler_VariantKind_ARM_HI16 1 /* :upper16: */
|
||||
#define LLVMDisassembler_VariantKind_ARM_LO16 2 /* :lower16: */
|
||||
|
||||
/**
|
||||
* The ARM64 target VariantKinds.
|
||||
*/
|
||||
#define LLVMDisassembler_VariantKind_ARM64_PAGE 1 /* @page */
|
||||
#define LLVMDisassembler_VariantKind_ARM64_PAGEOFF 2 /* @pageoff */
|
||||
#define LLVMDisassembler_VariantKind_ARM64_GOTPAGE 3 /* @gotpage */
|
||||
#define LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF 4 /* @gotpageoff */
|
||||
#define LLVMDisassembler_VariantKind_ARM64_TLVP 5 /* @tvlppage */
|
||||
#define LLVMDisassembler_VariantKind_ARM64_TLVOFF 6 /* @tvlppageoff */
|
||||
|
||||
/**
|
||||
* The type for the symbol lookup function. This may be called by the
|
||||
* disassembler for things like adding a comment for a PC plus a constant
|
||||
@ -123,6 +133,17 @@ typedef const char *(*LLVMSymbolLookupCallback)(void *DisInfo,
|
||||
/* The input reference is from a PC relative load instruction. */
|
||||
#define LLVMDisassembler_ReferenceType_In_PCrel_Load 2
|
||||
|
||||
/* The input reference is from an ARM64::ADRP instruction. */
|
||||
#define LLVMDisassembler_ReferenceType_In_ARM64_ADRP 0x100000001
|
||||
/* The input reference is from an ARM64::ADDXri instruction. */
|
||||
#define LLVMDisassembler_ReferenceType_In_ARM64_ADDXri 0x100000002
|
||||
/* The input reference is from an ARM64::LDRXui instruction. */
|
||||
#define LLVMDisassembler_ReferenceType_In_ARM64_LDRXui 0x100000003
|
||||
/* The input reference is from an ARM64::LDRXl instruction. */
|
||||
#define LLVMDisassembler_ReferenceType_In_ARM64_LDRXl 0x100000004
|
||||
/* The input reference is from an ARM64::ADR instruction. */
|
||||
#define LLVMDisassembler_ReferenceType_In_ARM64_ADR 0x100000005
|
||||
|
||||
/* The output reference is to as symbol stub. */
|
||||
#define LLVMDisassembler_ReferenceType_Out_SymbolStub 1
|
||||
/* The output reference is to a symbol address in a literal pool. */
|
||||
|
@ -48,6 +48,7 @@ public:
|
||||
|
||||
arm, // ARM (little endian): arm, armv.*, xscale
|
||||
armeb, // ARM (big endian): armeb
|
||||
arm64, // ARM: arm64
|
||||
aarch64, // AArch64 (little endian): aarch64
|
||||
aarch64_be, // AArch64 (big endian): aarch64_be
|
||||
hexagon, // Hexagon: hexagon
|
||||
|
@ -529,6 +529,7 @@ def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
|
||||
include "llvm/IR/IntrinsicsPowerPC.td"
|
||||
include "llvm/IR/IntrinsicsX86.td"
|
||||
include "llvm/IR/IntrinsicsARM.td"
|
||||
include "llvm/IR/IntrinsicsARM64.td"
|
||||
include "llvm/IR/IntrinsicsAArch64.td"
|
||||
include "llvm/IR/IntrinsicsXCore.td"
|
||||
include "llvm/IR/IntrinsicsHexagon.td"
|
||||
|
621
include/llvm/IR/IntrinsicsARM64.td
Normal file
621
include/llvm/IR/IntrinsicsARM64.td
Normal file
@ -0,0 +1,621 @@
|
||||
//===- IntrinsicsARM64.td - Defines ARM64 intrinsics -------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines all of the ARM64-specific intrinsics.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let TargetPrefix = "arm64" in {
|
||||
|
||||
def int_arm64_ldxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
|
||||
def int_arm64_stxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>;
|
||||
def int_arm64_clrex : Intrinsic<[]>;
|
||||
|
||||
def int_arm64_ldxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>;
|
||||
def int_arm64_stxp : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty,
|
||||
llvm_ptr_ty]>;
|
||||
|
||||
def int_arm64_sdiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
|
||||
LLVMMatchType<0>], [IntrNoMem]>;
|
||||
def int_arm64_udiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
|
||||
LLVMMatchType<0>], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Advanced SIMD (NEON)
|
||||
|
||||
let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.".
|
||||
class AdvSIMD_2Scalar_Float_Intrinsic
|
||||
: Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_FPToIntRounding_Intrinsic
|
||||
: Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_1IntArg_Intrinsic
|
||||
: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class AdvSIMD_1FloatArg_Intrinsic
|
||||
: Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class AdvSIMD_1VectorArg_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class AdvSIMD_1VectorArg_Expand_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
|
||||
class AdvSIMD_1VectorArg_Long_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>], [IntrNoMem]>;
|
||||
class AdvSIMD_1IntArg_Narrow_Intrinsic
|
||||
: Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty], [IntrNoMem]>;
|
||||
class AdvSIMD_1VectorArg_Narrow_Intrinsic
|
||||
: Intrinsic<[llvm_anyint_ty], [LLVMExtendedType<0>], [IntrNoMem]>;
|
||||
class AdvSIMD_1VectorArg_Int_Across_Intrinsic
|
||||
: Intrinsic<[llvm_anyint_ty], [llvm_anyvector_ty], [IntrNoMem]>;
|
||||
class AdvSIMD_1VectorArg_Float_Across_Intrinsic
|
||||
: Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_2IntArg_Intrinsic
|
||||
: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_2FloatArg_Intrinsic
|
||||
: Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_2VectorArg_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_2VectorArg_Compare_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_2Arg_FloatCompare_Intrinsic
|
||||
: Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_2VectorArg_Long_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMTruncatedType<0>,
|
||||
LLVMTruncatedType<0>],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_2VectorArg_Wide_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, LLVMTruncatedType<0>],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_2VectorArg_Narrow_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMExtendedType<0>, LLVMExtendedType<0>],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_2Arg_Scalar_Narrow_Intrinsic
|
||||
: Intrinsic<[llvm_anyint_ty],
|
||||
[LLVMExtendedType<0>, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_2VectorArg_Scalar_Expand_BySize_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[llvm_anyvector_ty],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMTruncatedType<0>],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMTruncatedType<0>, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_2VectorArg_Tied_Narrow_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMHalfElementsVectorType<0>, llvm_anyvector_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_3VectorArg_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_3VectorArg_Scalar_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_3VectorArg_Tied_Narrow_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMHalfElementsVectorType<0>, llvm_anyvector_ty,
|
||||
LLVMMatchType<1>], [IntrNoMem]>;
|
||||
class AdvSIMD_3VectorArg_Scalar_Tied_Narrow_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMHalfElementsVectorType<0>, llvm_anyvector_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_CvtFxToFP_Intrinsic
|
||||
: Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_CvtFPToFx_Intrinsic
|
||||
: Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Arithmetic ops
|
||||
|
||||
let Properties = [IntrNoMem] in {
|
||||
// Vector Add Across Lanes
|
||||
def int_arm64_neon_saddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
|
||||
def int_arm64_neon_uaddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
|
||||
def int_arm64_neon_faddv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
|
||||
|
||||
// Vector Long Add Across Lanes
|
||||
def int_arm64_neon_saddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
|
||||
def int_arm64_neon_uaddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
|
||||
|
||||
// Vector Halving Add
|
||||
def int_arm64_neon_shadd : AdvSIMD_2VectorArg_Intrinsic;
|
||||
def int_arm64_neon_uhadd : AdvSIMD_2VectorArg_Intrinsic;
|
||||
|
||||
// Vector Rounding Halving Add
|
||||
def int_arm64_neon_srhadd : AdvSIMD_2VectorArg_Intrinsic;
|
||||
def int_arm64_neon_urhadd : AdvSIMD_2VectorArg_Intrinsic;
|
||||
|
||||
// Vector Saturating Add
|
||||
def int_arm64_neon_sqadd : AdvSIMD_2IntArg_Intrinsic;
|
||||
def int_arm64_neon_suqadd : AdvSIMD_2IntArg_Intrinsic;
|
||||
def int_arm64_neon_usqadd : AdvSIMD_2IntArg_Intrinsic;
|
||||
def int_arm64_neon_uqadd : AdvSIMD_2IntArg_Intrinsic;
|
||||
|
||||
// Vector Add High-Half
|
||||
// FIXME: this is a legacy intrinsic for aarch64_simd.h. Remove it when that
|
||||
// header is no longer supported.
|
||||
def int_arm64_neon_addhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
|
||||
|
||||
// Vector Rounding Add High-Half
|
||||
def int_arm64_neon_raddhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
|
||||
|
||||
// Vector Saturating Doubling Multiply High
|
||||
def int_arm64_neon_sqdmulh : AdvSIMD_2IntArg_Intrinsic;
|
||||
|
||||
// Vector Saturating Rounding Doubling Multiply High
|
||||
def int_arm64_neon_sqrdmulh : AdvSIMD_2IntArg_Intrinsic;
|
||||
|
||||
// Vector Polynominal Multiply
|
||||
def int_arm64_neon_pmul : AdvSIMD_2VectorArg_Intrinsic;
|
||||
|
||||
// Vector Long Multiply
|
||||
def int_arm64_neon_smull : AdvSIMD_2VectorArg_Long_Intrinsic;
|
||||
def int_arm64_neon_umull : AdvSIMD_2VectorArg_Long_Intrinsic;
|
||||
def int_arm64_neon_pmull : AdvSIMD_2VectorArg_Long_Intrinsic;
|
||||
|
||||
// Vector Extending Multiply
|
||||
def int_arm64_neon_fmulx : AdvSIMD_2FloatArg_Intrinsic;
|
||||
|
||||
// Vector Saturating Doubling Long Multiply
|
||||
def int_arm64_neon_sqdmull : AdvSIMD_2VectorArg_Long_Intrinsic;
|
||||
def int_arm64_neon_sqdmulls_scalar
|
||||
: Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
// Vector Halving Subtract
|
||||
def int_arm64_neon_shsub : AdvSIMD_2VectorArg_Intrinsic;
|
||||
def int_arm64_neon_uhsub : AdvSIMD_2VectorArg_Intrinsic;
|
||||
|
||||
// Vector Saturating Subtract
|
||||
def int_arm64_neon_sqsub : AdvSIMD_2IntArg_Intrinsic;
|
||||
def int_arm64_neon_uqsub : AdvSIMD_2IntArg_Intrinsic;
|
||||
|
||||
// Vector Subtract High-Half
|
||||
// FIXME: this is a legacy intrinsic for aarch64_simd.h. Remove it when that
|
||||
// header is no longer supported.
|
||||
def int_arm64_neon_subhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
|
||||
|
||||
// Vector Rounding Subtract High-Half
|
||||
def int_arm64_neon_rsubhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
|
||||
|
||||
// Vector Compare Absolute Greater-than-or-equal
|
||||
def int_arm64_neon_facge : AdvSIMD_2Arg_FloatCompare_Intrinsic;
|
||||
|
||||
// Vector Compare Absolute Greater-than
|
||||
def int_arm64_neon_facgt : AdvSIMD_2Arg_FloatCompare_Intrinsic;
|
||||
|
||||
// Vector Absolute Difference
|
||||
def int_arm64_neon_sabd : AdvSIMD_2VectorArg_Intrinsic;
|
||||
def int_arm64_neon_uabd : AdvSIMD_2VectorArg_Intrinsic;
|
||||
def int_arm64_neon_fabd : AdvSIMD_2VectorArg_Intrinsic;
|
||||
|
||||
// Scalar Absolute Difference
|
||||
def int_arm64_sisd_fabd : AdvSIMD_2Scalar_Float_Intrinsic;
|
||||
|
||||
// Vector Max
|
||||
def int_arm64_neon_smax : AdvSIMD_2VectorArg_Intrinsic;
|
||||
def int_arm64_neon_umax : AdvSIMD_2VectorArg_Intrinsic;
|
||||
def int_arm64_neon_fmax : AdvSIMD_2VectorArg_Intrinsic;
|
||||
def int_arm64_neon_fmaxnmp : AdvSIMD_2VectorArg_Intrinsic;
|
||||
|
||||
// Vector Max Across Lanes
|
||||
def int_arm64_neon_smaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
|
||||
def int_arm64_neon_umaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
|
||||
def int_arm64_neon_fmaxv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
|
||||
def int_arm64_neon_fmaxnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
|
||||
|
||||
// Vector Min
|
||||
def int_arm64_neon_smin : AdvSIMD_2VectorArg_Intrinsic;
|
||||
def int_arm64_neon_umin : AdvSIMD_2VectorArg_Intrinsic;
|
||||
def int_arm64_neon_fmin : AdvSIMD_2VectorArg_Intrinsic;
|
||||
def int_arm64_neon_fminnmp : AdvSIMD_2VectorArg_Intrinsic;
|
||||
|
||||
// Vector Min/Max Number
|
||||
def int_arm64_neon_fminnm : AdvSIMD_2FloatArg_Intrinsic;
|
||||
def int_arm64_neon_fmaxnm : AdvSIMD_2FloatArg_Intrinsic;
|
||||
|
||||
// Vector Min Across Lanes
|
||||
def int_arm64_neon_sminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
|
||||
def int_arm64_neon_uminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
|
||||
def int_arm64_neon_fminv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
|
||||
def int_arm64_neon_fminnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
|
||||
|
||||
// Pairwise Add
|
||||
def int_arm64_neon_addp : AdvSIMD_2VectorArg_Intrinsic;
|
||||
|
||||
// Long Pairwise Add
|
||||
// FIXME: In theory, we shouldn't need intrinsics for saddlp or
|
||||
// uaddlp, but tblgen's type inference currently can't handle the
|
||||
// pattern fragments this ends up generating.
|
||||
def int_arm64_neon_saddlp : AdvSIMD_1VectorArg_Expand_Intrinsic;
|
||||
def int_arm64_neon_uaddlp : AdvSIMD_1VectorArg_Expand_Intrinsic;
|
||||
|
||||
// Folding Maximum
|
||||
def int_arm64_neon_smaxp : AdvSIMD_2VectorArg_Intrinsic;
|
||||
def int_arm64_neon_umaxp : AdvSIMD_2VectorArg_Intrinsic;
|
||||
def int_arm64_neon_fmaxp : AdvSIMD_2VectorArg_Intrinsic;
|
||||
|
||||
// Folding Minimum
|
||||
def int_arm64_neon_sminp : AdvSIMD_2VectorArg_Intrinsic;
|
||||
def int_arm64_neon_uminp : AdvSIMD_2VectorArg_Intrinsic;
|
||||
def int_arm64_neon_fminp : AdvSIMD_2VectorArg_Intrinsic;
|
||||
|
||||
// Reciprocal Estimate/Step
|
||||
def int_arm64_neon_frecps : AdvSIMD_2FloatArg_Intrinsic;
|
||||
def int_arm64_neon_frsqrts : AdvSIMD_2FloatArg_Intrinsic;
|
||||
|
||||
// Vector Saturating Shift Left
|
||||
def int_arm64_neon_sqshl : AdvSIMD_2IntArg_Intrinsic;
|
||||
def int_arm64_neon_uqshl : AdvSIMD_2IntArg_Intrinsic;
|
||||
|
||||
// Vector Rounding Shift Left
|
||||
def int_arm64_neon_srshl : AdvSIMD_2IntArg_Intrinsic;
|
||||
def int_arm64_neon_urshl : AdvSIMD_2IntArg_Intrinsic;
|
||||
|
||||
// Vector Saturating Rounding Shift Left
|
||||
def int_arm64_neon_sqrshl : AdvSIMD_2IntArg_Intrinsic;
|
||||
def int_arm64_neon_uqrshl : AdvSIMD_2IntArg_Intrinsic;
|
||||
|
||||
// Vector Signed->Unsigned Shift Left by Constant
|
||||
def int_arm64_neon_sqshlu : AdvSIMD_2IntArg_Intrinsic;
|
||||
|
||||
// Vector Signed->Unsigned Narrowing Saturating Shift Right by Constant
|
||||
def int_arm64_neon_sqshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
|
||||
|
||||
// Vector Signed->Unsigned Rounding Narrowing Saturating Shift Right by Const
|
||||
def int_arm64_neon_sqrshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
|
||||
|
||||
// Vector Narrowing Shift Right by Constant
|
||||
def int_arm64_neon_sqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
|
||||
def int_arm64_neon_uqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
|
||||
|
||||
// Vector Rounding Narrowing Shift Right by Constant
|
||||
def int_arm64_neon_rshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
|
||||
|
||||
// Vector Rounding Narrowing Saturating Shift Right by Constant
|
||||
def int_arm64_neon_sqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
|
||||
def int_arm64_neon_uqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
|
||||
|
||||
// Vector Shift Left
|
||||
def int_arm64_neon_sshl : AdvSIMD_2IntArg_Intrinsic;
|
||||
def int_arm64_neon_ushl : AdvSIMD_2IntArg_Intrinsic;
|
||||
|
||||
// Vector Widening Shift Left by Constant
|
||||
def int_arm64_neon_shll : AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic;
|
||||
def int_arm64_neon_sshll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic;
|
||||
def int_arm64_neon_ushll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic;
|
||||
|
||||
// Vector Shift Right by Constant and Insert
|
||||
def int_arm64_neon_vsri : AdvSIMD_3VectorArg_Scalar_Intrinsic;
|
||||
|
||||
// Vector Shift Left by Constant and Insert
|
||||
def int_arm64_neon_vsli : AdvSIMD_3VectorArg_Scalar_Intrinsic;
|
||||
|
||||
// Vector Saturating Narrow
|
||||
def int_arm64_neon_scalar_sqxtn: AdvSIMD_1IntArg_Narrow_Intrinsic;
|
||||
def int_arm64_neon_scalar_uqxtn : AdvSIMD_1IntArg_Narrow_Intrinsic;
|
||||
def int_arm64_neon_sqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic;
|
||||
def int_arm64_neon_uqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic;
|
||||
|
||||
// Vector Saturating Extract and Unsigned Narrow
|
||||
def int_arm64_neon_scalar_sqxtun : AdvSIMD_1IntArg_Narrow_Intrinsic;
|
||||
def int_arm64_neon_sqxtun : AdvSIMD_1VectorArg_Narrow_Intrinsic;
|
||||
|
||||
// Vector Absolute Value
|
||||
def int_arm64_neon_abs : AdvSIMD_1VectorArg_Intrinsic;
|
||||
|
||||
// Vector Saturating Absolute Value
|
||||
def int_arm64_neon_sqabs : AdvSIMD_1IntArg_Intrinsic;
|
||||
|
||||
// Vector Saturating Negation
|
||||
def int_arm64_neon_sqneg : AdvSIMD_1IntArg_Intrinsic;
|
||||
|
||||
// Vector Count Leading Sign Bits
|
||||
def int_arm64_neon_cls : AdvSIMD_1VectorArg_Intrinsic;
|
||||
|
||||
// Vector Reciprocal Estimate
|
||||
def int_arm64_neon_urecpe : AdvSIMD_1VectorArg_Intrinsic;
|
||||
def int_arm64_neon_frecpe : AdvSIMD_1VectorArg_Intrinsic;
|
||||
|
||||
// Vector Square Root Estimate
|
||||
def int_arm64_neon_ursqrte : AdvSIMD_1VectorArg_Intrinsic;
|
||||
def int_arm64_neon_frsqrte : AdvSIMD_1VectorArg_Intrinsic;
|
||||
|
||||
// Vector Bitwise Reverse
|
||||
def int_arm64_neon_rbit : AdvSIMD_1VectorArg_Intrinsic;
|
||||
|
||||
// Vector Conversions Between Half-Precision and Single-Precision.
|
||||
def int_arm64_neon_vcvtfp2hf
|
||||
: Intrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||
def int_arm64_neon_vcvthf2fp
|
||||
: Intrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>;
|
||||
|
||||
// Vector Conversions Between Floating-point and Fixed-point.
|
||||
def int_arm64_neon_vcvtfp2fxs : AdvSIMD_CvtFPToFx_Intrinsic;
|
||||
def int_arm64_neon_vcvtfp2fxu : AdvSIMD_CvtFPToFx_Intrinsic;
|
||||
def int_arm64_neon_vcvtfxs2fp : AdvSIMD_CvtFxToFP_Intrinsic;
|
||||
def int_arm64_neon_vcvtfxu2fp : AdvSIMD_CvtFxToFP_Intrinsic;
|
||||
|
||||
// Vector FP->Int Conversions
|
||||
def int_arm64_neon_fcvtas : AdvSIMD_FPToIntRounding_Intrinsic;
|
||||
def int_arm64_neon_fcvtau : AdvSIMD_FPToIntRounding_Intrinsic;
|
||||
def int_arm64_neon_fcvtms : AdvSIMD_FPToIntRounding_Intrinsic;
|
||||
def int_arm64_neon_fcvtmu : AdvSIMD_FPToIntRounding_Intrinsic;
|
||||
def int_arm64_neon_fcvtns : AdvSIMD_FPToIntRounding_Intrinsic;
|
||||
def int_arm64_neon_fcvtnu : AdvSIMD_FPToIntRounding_Intrinsic;
|
||||
def int_arm64_neon_fcvtps : AdvSIMD_FPToIntRounding_Intrinsic;
|
||||
def int_arm64_neon_fcvtpu : AdvSIMD_FPToIntRounding_Intrinsic;
|
||||
def int_arm64_neon_fcvtzs : AdvSIMD_FPToIntRounding_Intrinsic;
|
||||
def int_arm64_neon_fcvtzu : AdvSIMD_FPToIntRounding_Intrinsic;
|
||||
|
||||
// Vector FP Rounding: only ties to even is unrepresented by a normal
|
||||
// intrinsic.
|
||||
def int_arm64_neon_frintn : AdvSIMD_1FloatArg_Intrinsic;
|
||||
|
||||
// Scalar FP->Int conversions
|
||||
|
||||
// Vector FP Inexact Narrowing
|
||||
def int_arm64_neon_fcvtxn : AdvSIMD_1VectorArg_Expand_Intrinsic;
|
||||
|
||||
// Scalar FP Inexact Narrowing
|
||||
def int_arm64_sisd_fcvtxn : Intrinsic<[llvm_float_ty], [llvm_double_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.".
|
||||
class AdvSIMD_2Vector2Index_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[llvm_anyvector_ty, llvm_i64_ty, LLVMMatchType<0>, llvm_i64_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Vector element to element moves
|
||||
def int_arm64_neon_vcopy_lane: AdvSIMD_2Vector2Index_Intrinsic;
|
||||
|
||||
let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.".
|
||||
class AdvSIMD_1Vec_Load_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType<LLVMMatchType<0>>],
|
||||
[IntrReadArgMem]>;
|
||||
class AdvSIMD_1Vec_Store_Lane_Intrinsic
|
||||
: Intrinsic<[], [llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty],
|
||||
[IntrReadWriteArgMem, NoCapture<2>]>;
|
||||
|
||||
class AdvSIMD_2Vec_Load_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>],
|
||||
[IntrReadArgMem]>;
|
||||
class AdvSIMD_2Vec_Load_Lane_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>,
|
||||
llvm_i64_ty, llvm_anyptr_ty],
|
||||
[IntrReadArgMem]>;
|
||||
class AdvSIMD_2Vec_Store_Intrinsic
|
||||
: Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
|
||||
LLVMAnyPointerType<LLVMMatchType<0>>],
|
||||
[IntrReadWriteArgMem, NoCapture<2>]>;
|
||||
class AdvSIMD_2Vec_Store_Lane_Intrinsic
|
||||
: Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
|
||||
llvm_i64_ty, llvm_anyptr_ty],
|
||||
[IntrReadWriteArgMem, NoCapture<3>]>;
|
||||
|
||||
class AdvSIMD_3Vec_Load_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>],
|
||||
[IntrReadArgMem]>;
|
||||
class AdvSIMD_3Vec_Load_Lane_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
|
||||
llvm_i64_ty, llvm_anyptr_ty],
|
||||
[IntrReadArgMem]>;
|
||||
class AdvSIMD_3Vec_Store_Intrinsic
|
||||
: Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
|
||||
LLVMMatchType<0>, LLVMAnyPointerType<LLVMMatchType<0>>],
|
||||
[IntrReadWriteArgMem, NoCapture<3>]>;
|
||||
class AdvSIMD_3Vec_Store_Lane_Intrinsic
|
||||
: Intrinsic<[], [llvm_anyvector_ty,
|
||||
LLVMMatchType<0>, LLVMMatchType<0>,
|
||||
llvm_i64_ty, llvm_anyptr_ty],
|
||||
[IntrReadWriteArgMem, NoCapture<4>]>;
|
||||
|
||||
class AdvSIMD_4Vec_Load_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
|
||||
LLVMMatchType<0>, LLVMMatchType<0>],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>],
|
||||
[IntrReadArgMem]>;
|
||||
class AdvSIMD_4Vec_Load_Lane_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
|
||||
LLVMMatchType<0>, LLVMMatchType<0>],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>,
|
||||
LLVMMatchType<0>, LLVMMatchType<0>,
|
||||
llvm_i64_ty, llvm_anyptr_ty],
|
||||
[IntrReadArgMem]>;
|
||||
class AdvSIMD_4Vec_Store_Intrinsic
|
||||
: Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
|
||||
LLVMMatchType<0>, LLVMMatchType<0>,
|
||||
LLVMAnyPointerType<LLVMMatchType<0>>],
|
||||
[IntrReadWriteArgMem, NoCapture<4>]>;
|
||||
class AdvSIMD_4Vec_Store_Lane_Intrinsic
|
||||
: Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
|
||||
LLVMMatchType<0>, LLVMMatchType<0>,
|
||||
llvm_i64_ty, llvm_anyptr_ty],
|
||||
[IntrReadWriteArgMem, NoCapture<5>]>;
|
||||
}
|
||||
|
||||
// Memory ops
|
||||
|
||||
def int_arm64_neon_ld1x2 : AdvSIMD_2Vec_Load_Intrinsic;
|
||||
def int_arm64_neon_ld1x3 : AdvSIMD_3Vec_Load_Intrinsic;
|
||||
def int_arm64_neon_ld1x4 : AdvSIMD_4Vec_Load_Intrinsic;
|
||||
|
||||
def int_arm64_neon_st1x2 : AdvSIMD_2Vec_Store_Intrinsic;
|
||||
def int_arm64_neon_st1x3 : AdvSIMD_3Vec_Store_Intrinsic;
|
||||
def int_arm64_neon_st1x4 : AdvSIMD_4Vec_Store_Intrinsic;
|
||||
|
||||
def int_arm64_neon_ld2 : AdvSIMD_2Vec_Load_Intrinsic;
|
||||
def int_arm64_neon_ld3 : AdvSIMD_3Vec_Load_Intrinsic;
|
||||
def int_arm64_neon_ld4 : AdvSIMD_4Vec_Load_Intrinsic;
|
||||
|
||||
def int_arm64_neon_ld2lane : AdvSIMD_2Vec_Load_Lane_Intrinsic;
|
||||
def int_arm64_neon_ld3lane : AdvSIMD_3Vec_Load_Lane_Intrinsic;
|
||||
def int_arm64_neon_ld4lane : AdvSIMD_4Vec_Load_Lane_Intrinsic;
|
||||
|
||||
def int_arm64_neon_ld2r : AdvSIMD_2Vec_Load_Intrinsic;
|
||||
def int_arm64_neon_ld3r : AdvSIMD_3Vec_Load_Intrinsic;
|
||||
def int_arm64_neon_ld4r : AdvSIMD_4Vec_Load_Intrinsic;
|
||||
|
||||
def int_arm64_neon_st2 : AdvSIMD_2Vec_Store_Intrinsic;
|
||||
def int_arm64_neon_st3 : AdvSIMD_3Vec_Store_Intrinsic;
|
||||
def int_arm64_neon_st4 : AdvSIMD_4Vec_Store_Intrinsic;
|
||||
|
||||
def int_arm64_neon_st2lane : AdvSIMD_2Vec_Store_Lane_Intrinsic;
|
||||
def int_arm64_neon_st3lane : AdvSIMD_3Vec_Store_Lane_Intrinsic;
|
||||
def int_arm64_neon_st4lane : AdvSIMD_4Vec_Store_Lane_Intrinsic;
|
||||
|
||||
let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.".
|
||||
class AdvSIMD_Tbl1_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty], [llvm_v16i8_ty, LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_Tbl2_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class AdvSIMD_Tbl3_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
|
||||
LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_Tbl4_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
|
||||
LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class AdvSIMD_Tbx1_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, llvm_v16i8_ty, LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_Tbx2_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
|
||||
LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_Tbx3_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
|
||||
llvm_v16i8_ty, LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_Tbx4_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
|
||||
llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
def int_arm64_neon_tbl1 : AdvSIMD_Tbl1_Intrinsic;
|
||||
def int_arm64_neon_tbl2 : AdvSIMD_Tbl2_Intrinsic;
|
||||
def int_arm64_neon_tbl3 : AdvSIMD_Tbl3_Intrinsic;
|
||||
def int_arm64_neon_tbl4 : AdvSIMD_Tbl4_Intrinsic;
|
||||
|
||||
def int_arm64_neon_tbx1 : AdvSIMD_Tbx1_Intrinsic;
|
||||
def int_arm64_neon_tbx2 : AdvSIMD_Tbx2_Intrinsic;
|
||||
def int_arm64_neon_tbx3 : AdvSIMD_Tbx3_Intrinsic;
|
||||
def int_arm64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic;
|
||||
|
||||
let TargetPrefix = "arm64" in {
|
||||
class Crypto_AES_DataKey_Intrinsic
|
||||
: Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
|
||||
|
||||
class Crypto_AES_Data_Intrinsic
|
||||
: Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
|
||||
|
||||
// SHA intrinsic taking 5 words of the hash (v4i32, i32) and 4 of the schedule
|
||||
// (v4i32).
|
||||
class Crypto_SHA_5Hash4Schedule_Intrinsic
|
||||
: Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// SHA intrinsic taking 5 words of the hash (v4i32, i32) and 4 of the schedule
|
||||
// (v4i32).
|
||||
class Crypto_SHA_1Hash_Intrinsic
|
||||
: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
// SHA intrinsic taking 8 words of the schedule
|
||||
class Crypto_SHA_8Schedule_Intrinsic
|
||||
: Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
|
||||
|
||||
// SHA intrinsic taking 12 words of the schedule
|
||||
class Crypto_SHA_12Schedule_Intrinsic
|
||||
: Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// SHA intrinsic taking 8 words of the hash and 4 of the schedule.
|
||||
class Crypto_SHA_8Hash4Schedule_Intrinsic
|
||||
: Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
// AES
|
||||
def int_arm64_crypto_aese : Crypto_AES_DataKey_Intrinsic;
|
||||
def int_arm64_crypto_aesd : Crypto_AES_DataKey_Intrinsic;
|
||||
def int_arm64_crypto_aesmc : Crypto_AES_Data_Intrinsic;
|
||||
def int_arm64_crypto_aesimc : Crypto_AES_Data_Intrinsic;
|
||||
|
||||
// SHA1
|
||||
def int_arm64_crypto_sha1c : Crypto_SHA_5Hash4Schedule_Intrinsic;
|
||||
def int_arm64_crypto_sha1p : Crypto_SHA_5Hash4Schedule_Intrinsic;
|
||||
def int_arm64_crypto_sha1m : Crypto_SHA_5Hash4Schedule_Intrinsic;
|
||||
def int_arm64_crypto_sha1h : Crypto_SHA_1Hash_Intrinsic;
|
||||
|
||||
def int_arm64_crypto_sha1su0 : Crypto_SHA_12Schedule_Intrinsic;
|
||||
def int_arm64_crypto_sha1su1 : Crypto_SHA_8Schedule_Intrinsic;
|
||||
|
||||
// SHA256
|
||||
def int_arm64_crypto_sha256h : Crypto_SHA_8Hash4Schedule_Intrinsic;
|
||||
def int_arm64_crypto_sha256h2 : Crypto_SHA_8Hash4Schedule_Intrinsic;
|
||||
def int_arm64_crypto_sha256su0 : Crypto_SHA_8Schedule_Intrinsic;
|
||||
def int_arm64_crypto_sha256su1 : Crypto_SHA_12Schedule_Intrinsic;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// CRC32
|
||||
|
||||
let TargetPrefix = "arm64" in {
|
||||
|
||||
def int_arm64_crc32b : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_arm64_crc32cb : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_arm64_crc32h : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_arm64_crc32ch : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_arm64_crc32w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_arm64_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_arm64_crc32x : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_arm64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
@ -158,7 +158,13 @@ public:
|
||||
VK_TLSLDM,
|
||||
VK_TPOFF,
|
||||
VK_DTPOFF,
|
||||
VK_TLVP, // Mach-O thread local variable relocation
|
||||
VK_TLVP, // Mach-O thread local variable relocations
|
||||
VK_TLVPPAGE,
|
||||
VK_TLVPPAGEOFF,
|
||||
VK_PAGE,
|
||||
VK_PAGEOFF,
|
||||
VK_GOTPAGE,
|
||||
VK_GOTPAGEOFF,
|
||||
VK_SECREL,
|
||||
VK_WEAKREF, // The link between the symbols in .weakref foo, bar
|
||||
|
||||
|
@ -408,6 +408,34 @@ namespace llvm {
|
||||
ARM_RELOC_HALF = 8,
|
||||
ARM_RELOC_HALF_SECTDIFF = 9,
|
||||
|
||||
// Constant values for the r_type field in an ARM64 architecture
|
||||
// llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
|
||||
// structure.
|
||||
|
||||
// For pointers.
|
||||
ARM64_RELOC_UNSIGNED = 0,
|
||||
// Must be followed by an ARM64_RELOC_UNSIGNED
|
||||
ARM64_RELOC_SUBTRACTOR = 1,
|
||||
// A B/BL instruction with 26-bit displacement.
|
||||
ARM64_RELOC_BRANCH26 = 2,
|
||||
// PC-rel distance to page of target.
|
||||
ARM64_RELOC_PAGE21 = 3,
|
||||
// Offset within page, scaled by r_length.
|
||||
ARM64_RELOC_PAGEOFF12 = 4,
|
||||
// PC-rel distance to page of GOT slot.
|
||||
ARM64_RELOC_GOT_LOAD_PAGE21 = 5,
|
||||
// Offset within page of GOT slot, scaled by r_length.
|
||||
ARM64_RELOC_GOT_LOAD_PAGEOFF12 = 6,
|
||||
// For pointers to GOT slots.
|
||||
ARM64_RELOC_POINTER_TO_GOT = 7,
|
||||
// PC-rel distance to page of TLVP slot.
|
||||
ARM64_RELOC_TLVP_LOAD_PAGE21 = 8,
|
||||
// Offset within page of TLVP slot, scaled by r_length.
|
||||
ARM64_RELOC_TLVP_LOAD_PAGEOFF12 = 9,
|
||||
// Must be followed by ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12.
|
||||
ARM64_RELOC_ADDEND = 10,
|
||||
|
||||
|
||||
// Constant values for the r_type field in an x86_64 architecture
|
||||
// llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
|
||||
// structure
|
||||
@ -914,6 +942,7 @@ namespace llvm {
|
||||
/* CPU_TYPE_MIPS = 8, */
|
||||
CPU_TYPE_MC98000 = 10, // Old Motorola PowerPC
|
||||
CPU_TYPE_ARM = 12,
|
||||
CPU_TYPE_ARM64 = CPU_TYPE_ARM | CPU_ARCH_ABI64,
|
||||
CPU_TYPE_SPARC = 14,
|
||||
CPU_TYPE_POWERPC = 18,
|
||||
CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64
|
||||
@ -987,6 +1016,10 @@ namespace llvm {
|
||||
CPU_SUBTYPE_ARM_V7EM = 16
|
||||
};
|
||||
|
||||
enum CPUSubTypeARM64 {
|
||||
CPU_SUBTYPE_ARM64_ALL = 0
|
||||
};
|
||||
|
||||
enum CPUSubTypeSPARC {
|
||||
CPU_SUBTYPE_SPARC_ALL = 0
|
||||
};
|
||||
|
@ -362,7 +362,6 @@ def bitconvert : SDNode<"ISD::BITCAST" , SDTUnaryOp>;
|
||||
def extractelt : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTVecExtract>;
|
||||
def insertelt : SDNode<"ISD::INSERT_VECTOR_ELT", SDTVecInsert>;
|
||||
|
||||
|
||||
def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPCommutative]>;
|
||||
def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>;
|
||||
def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>;
|
||||
@ -466,7 +465,7 @@ def vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
|
||||
def vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
|
||||
SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>;
|
||||
def concat_vectors : SDNode<"ISD::CONCAT_VECTORS",
|
||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1, 2>]>,[]>;
|
||||
SDTypeProfile<1, 2, [SDTCisSubVecOfVec<1, 0>, SDTCisSameAs<1, 2>]>,[]>;
|
||||
|
||||
// This operator does not do subvector type checking. The ARM
|
||||
// backend, at least, needs it.
|
||||
|
@ -167,6 +167,10 @@ void RuntimeDyldMachO::resolveRelocation(const SectionEntry &Section,
|
||||
resolveARMRelocation(LocalAddress, FinalAddress, (uintptr_t)Value, isPCRel,
|
||||
MachoType, Size, Addend);
|
||||
break;
|
||||
case Triple::arm64:
|
||||
resolveARM64Relocation(LocalAddress, FinalAddress, (uintptr_t)Value,
|
||||
isPCRel, MachoType, Size, Addend);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -293,6 +297,55 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool RuntimeDyldMachO::resolveARM64Relocation(uint8_t *LocalAddress,
|
||||
uint64_t FinalAddress,
|
||||
uint64_t Value, bool isPCRel,
|
||||
unsigned Type, unsigned Size,
|
||||
int64_t Addend) {
|
||||
// If the relocation is PC-relative, the value to be encoded is the
|
||||
// pointer difference.
|
||||
if (isPCRel)
|
||||
Value -= FinalAddress;
|
||||
|
||||
switch (Type) {
|
||||
default:
|
||||
llvm_unreachable("Invalid relocation type!");
|
||||
case MachO::ARM64_RELOC_UNSIGNED: {
|
||||
// Mask in the target value a byte at a time (we don't have an alignment
|
||||
// guarantee for the target address, so this is safest).
|
||||
uint8_t *p = (uint8_t *)LocalAddress;
|
||||
for (unsigned i = 0; i < Size; ++i) {
|
||||
*p++ = (uint8_t)Value;
|
||||
Value >>= 8;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case MachO::ARM64_RELOC_BRANCH26: {
|
||||
// Mask the value into the target address. We know instructions are
|
||||
// 32-bit aligned, so we can do it all at once.
|
||||
uint32_t *p = (uint32_t *)LocalAddress;
|
||||
// The low two bits of the value are not encoded.
|
||||
Value >>= 2;
|
||||
// Mask the value to 26 bits.
|
||||
Value &= 0x3ffffff;
|
||||
// Insert the value into the instruction.
|
||||
*p = (*p & ~0x3ffffff) | Value;
|
||||
break;
|
||||
}
|
||||
case MachO::ARM64_RELOC_SUBTRACTOR:
|
||||
case MachO::ARM64_RELOC_PAGE21:
|
||||
case MachO::ARM64_RELOC_PAGEOFF12:
|
||||
case MachO::ARM64_RELOC_GOT_LOAD_PAGE21:
|
||||
case MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12:
|
||||
case MachO::ARM64_RELOC_POINTER_TO_GOT:
|
||||
case MachO::ARM64_RELOC_TLVP_LOAD_PAGE21:
|
||||
case MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
|
||||
case MachO::ARM64_RELOC_ADDEND:
|
||||
return Error("Relocation type not implemented yet!");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
relocation_iterator RuntimeDyldMachO::processRelocationRef(
|
||||
unsigned SectionID, relocation_iterator RelI, ObjectImage &Obj,
|
||||
ObjSectionToIDMap &ObjSectionToID, const SymbolTableMap &Symbols,
|
||||
|
@ -34,6 +34,9 @@ class RuntimeDyldMachO : public RuntimeDyldImpl {
|
||||
bool resolveARMRelocation(uint8_t *LocalAddress, uint64_t FinalAddress,
|
||||
uint64_t Value, bool isPCRel, unsigned Type,
|
||||
unsigned Size, int64_t Addend);
|
||||
bool resolveARM64Relocation(uint8_t *LocalAddress, uint64_t FinalAddress,
|
||||
uint64_t Value, bool IsPCRel, unsigned Type,
|
||||
unsigned Size, int64_t Addend);
|
||||
|
||||
void resolveRelocation(const SectionEntry &Section, uint64_t Offset,
|
||||
uint64_t Value, uint32_t Type, int64_t Addend,
|
||||
|
@ -321,6 +321,8 @@ bool LTOCodeGenerator::determineTarget(std::string &errMsg) {
|
||||
MCpu = "core2";
|
||||
else if (Triple.getArch() == llvm::Triple::x86)
|
||||
MCpu = "yonah";
|
||||
else if (Triple.getArch() == llvm::Triple::arm64)
|
||||
MCpu = "cyclone";
|
||||
}
|
||||
|
||||
TargetMach = march->createTargetMachine(TripleStr, MCpu, FeatureStr, Options,
|
||||
|
@ -168,6 +168,8 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
|
||||
CPU = "core2";
|
||||
else if (Triple.getArch() == llvm::Triple::x86)
|
||||
CPU = "yonah";
|
||||
else if (Triple.getArch() == llvm::Triple::arm64)
|
||||
CPU = "cyclone";
|
||||
}
|
||||
|
||||
TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,
|
||||
|
@ -179,6 +179,12 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
|
||||
case VK_TPOFF: return "TPOFF";
|
||||
case VK_DTPOFF: return "DTPOFF";
|
||||
case VK_TLVP: return "TLVP";
|
||||
case VK_TLVPPAGE: return "TLVPPAGE";
|
||||
case VK_TLVPPAGEOFF: return "TLVPPAGEOFF";
|
||||
case VK_PAGE: return "PAGE";
|
||||
case VK_PAGEOFF: return "PAGEOFF";
|
||||
case VK_GOTPAGE: return "GOTPAGE";
|
||||
case VK_GOTPAGEOFF: return "GOTPAGEOFF";
|
||||
case VK_SECREL: return "SECREL32";
|
||||
case VK_WEAKREF: return "WEAKREF";
|
||||
case VK_ARM_NONE: return "none";
|
||||
@ -300,6 +306,18 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
|
||||
.Case("dtpoff", VK_DTPOFF)
|
||||
.Case("TLVP", VK_TLVP)
|
||||
.Case("tlvp", VK_TLVP)
|
||||
.Case("TLVPPAGE", VK_TLVPPAGE)
|
||||
.Case("tlvppage", VK_TLVPPAGE)
|
||||
.Case("TLVPPAGEOFF", VK_TLVPPAGEOFF)
|
||||
.Case("tlvppageoff", VK_TLVPPAGEOFF)
|
||||
.Case("PAGE", VK_PAGE)
|
||||
.Case("page", VK_PAGE)
|
||||
.Case("PAGEOFF", VK_PAGEOFF)
|
||||
.Case("pageoff", VK_PAGEOFF)
|
||||
.Case("GOTPAGE", VK_GOTPAGE)
|
||||
.Case("gotpage", VK_GOTPAGE)
|
||||
.Case("GOTPAGEOFF", VK_GOTPAGEOFF)
|
||||
.Case("gotpageoff", VK_GOTPAGEOFF)
|
||||
.Case("IMGREL", VK_COFF_IMGREL32)
|
||||
.Case("imgrel", VK_COFF_IMGREL32)
|
||||
.Case("SECREL32", VK_SECREL)
|
||||
|
@ -22,6 +22,9 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
|
||||
IsFunctionEHFrameSymbolPrivate = false;
|
||||
SupportsWeakOmittedEHFrame = false;
|
||||
|
||||
if (T.isOSDarwin() && T.getArch() == Triple::arm64)
|
||||
SupportsCompactUnwindWithoutEHFrame = true;
|
||||
|
||||
PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel
|
||||
| dwarf::DW_EH_PE_sdata4;
|
||||
LSDAEncoding = FDEEncoding = FDECFIEncoding = dwarf::DW_EH_PE_pcrel;
|
||||
@ -146,7 +149,8 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
|
||||
|
||||
COFFDebugSymbolsSection = 0;
|
||||
|
||||
if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) {
|
||||
if ((T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) ||
|
||||
(T.isOSDarwin() && T.getArch() == Triple::arm64)) {
|
||||
CompactUnwindSection =
|
||||
Ctx->getMachOSection("__LD", "__compact_unwind",
|
||||
MachO::S_ATTR_DEBUG,
|
||||
@ -154,6 +158,8 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
|
||||
|
||||
if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)
|
||||
CompactUnwindDwarfEHFrameOnly = 0x04000000;
|
||||
else if (T.getArch() == Triple::arm64)
|
||||
CompactUnwindDwarfEHFrameOnly = 0x03000000;
|
||||
}
|
||||
|
||||
// Debug Information.
|
||||
@ -763,6 +769,7 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
|
||||
// cellspu-apple-darwin. Perhaps we should fix in Triple?
|
||||
if ((Arch == Triple::x86 || Arch == Triple::x86_64 ||
|
||||
Arch == Triple::arm || Arch == Triple::thumb ||
|
||||
Arch == Triple::arm64 ||
|
||||
Arch == Triple::ppc || Arch == Triple::ppc64 ||
|
||||
Arch == Triple::UnknownArch) &&
|
||||
(T.isOSDarwin() || T.isOSBinFormatMachO())) {
|
||||
|
@ -13,6 +13,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Object/MachO.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/Support/DataExtractor.h"
|
||||
#include "llvm/Support/Format.h"
|
||||
@ -934,6 +935,23 @@ MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
|
||||
res = Table[RType];
|
||||
break;
|
||||
}
|
||||
case Triple::arm64:
|
||||
case Triple::aarch64: {
|
||||
static const char *const Table[] = {
|
||||
"ARM64_RELOC_UNSIGNED", "ARM64_RELOC_SUBTRACTOR",
|
||||
"ARM64_RELOC_BRANCH26", "ARM64_RELOC_PAGE21",
|
||||
"ARM64_RELOC_PAGEOFF12", "ARM64_RELOC_GOT_LOAD_PAGE21",
|
||||
"ARM64_RELOC_GOT_LOAD_PAGEOFF12", "ARM64_RELOC_POINTER_TO_GOT",
|
||||
"ARM64_RELOC_TLVP_LOAD_PAGE21", "ARM64_RELOC_TLVP_LOAD_PAGEOFF12",
|
||||
"ARM64_RELOC_ADDEND"
|
||||
};
|
||||
|
||||
if (RType >= array_lengthof(Table))
|
||||
res = "Unknown";
|
||||
else
|
||||
res = Table[RType];
|
||||
break;
|
||||
}
|
||||
case Triple::ppc: {
|
||||
static const char *const Table[] = {
|
||||
"PPC_RELOC_VANILLA",
|
||||
@ -1256,6 +1274,8 @@ StringRef MachOObjectFile::getFileFormatName() const {
|
||||
switch (CPUType) {
|
||||
case llvm::MachO::CPU_TYPE_X86_64:
|
||||
return "Mach-O 64-bit x86-64";
|
||||
case llvm::MachO::CPU_TYPE_ARM64:
|
||||
return "Mach-O arm64";
|
||||
case llvm::MachO::CPU_TYPE_POWERPC64:
|
||||
return "Mach-O 64-bit ppc64";
|
||||
default:
|
||||
@ -1271,6 +1291,8 @@ Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) {
|
||||
return Triple::x86_64;
|
||||
case llvm::MachO::CPU_TYPE_ARM:
|
||||
return Triple::arm;
|
||||
case llvm::MachO::CPU_TYPE_ARM64:
|
||||
return Triple::arm64;
|
||||
case llvm::MachO::CPU_TYPE_POWERPC:
|
||||
return Triple::ppc;
|
||||
case llvm::MachO::CPU_TYPE_POWERPC64:
|
||||
|
@ -23,6 +23,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
|
||||
case aarch64_be: return "aarch64_be";
|
||||
case arm: return "arm";
|
||||
case armeb: return "armeb";
|
||||
case arm64: return "arm64";
|
||||
case hexagon: return "hexagon";
|
||||
case mips: return "mips";
|
||||
case mipsel: return "mipsel";
|
||||
@ -66,6 +67,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
|
||||
case thumb:
|
||||
case thumbeb: return "arm";
|
||||
|
||||
case arm64: return "arm64";
|
||||
|
||||
case ppc64:
|
||||
case ppc64le:
|
||||
case ppc: return "ppc";
|
||||
@ -91,6 +94,7 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
|
||||
|
||||
case nvptx: return "nvptx";
|
||||
case nvptx64: return "nvptx";
|
||||
|
||||
case le32: return "le32";
|
||||
case amdil: return "amdil";
|
||||
case spir: return "spir";
|
||||
@ -173,6 +177,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
|
||||
.Case("aarch64_be", aarch64_be)
|
||||
.Case("arm", arm)
|
||||
.Case("armeb", armeb)
|
||||
.Case("arm64", arm64)
|
||||
.Case("mips", mips)
|
||||
.Case("mipsel", mipsel)
|
||||
.Case("mips64", mips64)
|
||||
@ -219,6 +224,7 @@ const char *Triple::getArchNameForAssembler() {
|
||||
.Cases("armv6", "thumbv6", "armv6")
|
||||
.Cases("armv7", "thumbv7", "armv7")
|
||||
.Case("armeb", "armeb")
|
||||
.Case("arm64", "arm64")
|
||||
.Case("r600", "r600")
|
||||
.Case("nvptx", "nvptx")
|
||||
.Case("nvptx64", "nvptx64")
|
||||
@ -250,6 +256,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
|
||||
.StartsWith("thumbv", Triple::thumb)
|
||||
.Case("thumbeb", Triple::thumbeb)
|
||||
.StartsWith("thumbebv", Triple::thumbeb)
|
||||
.Case("arm64", Triple::arm64)
|
||||
.Case("msp430", Triple::msp430)
|
||||
.Cases("mips", "mipseb", "mipsallegrex", Triple::mips)
|
||||
.Cases("mipsel", "mipsallegrexel", Triple::mipsel)
|
||||
@ -681,9 +688,9 @@ void Triple::getiOSVersion(unsigned &Major, unsigned &Minor,
|
||||
break;
|
||||
case IOS:
|
||||
getOSVersion(Major, Minor, Micro);
|
||||
// Default to 5.0.
|
||||
// Default to 5.0 (or 7.0 for arm64).
|
||||
if (Major == 0)
|
||||
Major = 5;
|
||||
Major = (getArch() == arm64) ? 7 : 5;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -771,6 +778,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
|
||||
case llvm::Triple::spir:
|
||||
return 32;
|
||||
|
||||
case llvm::Triple::arm64:
|
||||
case llvm::Triple::aarch64:
|
||||
case llvm::Triple::aarch64_be:
|
||||
case llvm::Triple::mips64:
|
||||
@ -838,6 +846,7 @@ Triple Triple::get32BitArchVariant() const {
|
||||
case Triple::sparcv9: T.setArch(Triple::sparc); break;
|
||||
case Triple::x86_64: T.setArch(Triple::x86); break;
|
||||
case Triple::spir64: T.setArch(Triple::spir); break;
|
||||
case Triple::arm64: T.setArch(Triple::arm); break;
|
||||
}
|
||||
return T;
|
||||
}
|
||||
@ -847,7 +856,6 @@ Triple Triple::get64BitArchVariant() const {
|
||||
switch (getArch()) {
|
||||
case Triple::UnknownArch:
|
||||
case Triple::amdil:
|
||||
case Triple::arm:
|
||||
case Triple::armeb:
|
||||
case Triple::hexagon:
|
||||
case Triple::le32:
|
||||
@ -871,6 +879,7 @@ Triple Triple::get64BitArchVariant() const {
|
||||
case Triple::sparcv9:
|
||||
case Triple::systemz:
|
||||
case Triple::x86_64:
|
||||
case Triple::arm64:
|
||||
// Already 64-bit.
|
||||
break;
|
||||
|
||||
@ -881,6 +890,7 @@ Triple Triple::get64BitArchVariant() const {
|
||||
case Triple::sparc: T.setArch(Triple::sparcv9); break;
|
||||
case Triple::x86: T.setArch(Triple::x86_64); break;
|
||||
case Triple::spir: T.setArch(Triple::spir64); break;
|
||||
case Triple::arm: T.setArch(Triple::arm64); break;
|
||||
}
|
||||
return T;
|
||||
}
|
||||
|
@ -205,7 +205,7 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
|
||||
void* start = NearBlock ? (unsigned char*)NearBlock->base() +
|
||||
NearBlock->size() : 0;
|
||||
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
|
||||
void *pa = ::mmap(start, PageSize*NumPages, PROT_READ|PROT_EXEC,
|
||||
flags, fd, 0);
|
||||
#else
|
||||
@ -220,7 +220,7 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
|
||||
return MemoryBlock();
|
||||
}
|
||||
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
|
||||
kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)pa,
|
||||
(vm_size_t)(PageSize*NumPages), 0,
|
||||
VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
|
||||
@ -253,7 +253,7 @@ bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
|
||||
}
|
||||
|
||||
bool Memory::setWritable (MemoryBlock &M, std::string *ErrMsg) {
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
|
||||
if (M.Address == 0 || M.Size == 0) return false;
|
||||
Memory::InvalidateInstructionCache(M.Address, M.Size);
|
||||
kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
|
||||
@ -265,7 +265,7 @@ bool Memory::setWritable (MemoryBlock &M, std::string *ErrMsg) {
|
||||
}
|
||||
|
||||
bool Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) {
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
|
||||
if (M.Address == 0 || M.Size == 0) return false;
|
||||
Memory::InvalidateInstructionCache(M.Address, M.Size);
|
||||
kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
|
||||
@ -280,7 +280,7 @@ bool Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) {
|
||||
}
|
||||
|
||||
bool Memory::setRangeWritable(const void *Addr, size_t Size) {
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
|
||||
kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
|
||||
(vm_size_t)Size, 0,
|
||||
VM_PROT_READ | VM_PROT_WRITE);
|
||||
@ -291,7 +291,7 @@ bool Memory::setRangeWritable(const void *Addr, size_t Size) {
|
||||
}
|
||||
|
||||
bool Memory::setRangeExecutable(const void *Addr, size_t Size) {
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
|
||||
kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
|
||||
(vm_size_t)Size, 0,
|
||||
VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
|
||||
@ -311,7 +311,8 @@ void Memory::InvalidateInstructionCache(const void *Addr,
|
||||
#if defined(__APPLE__)
|
||||
|
||||
# if (defined(__POWERPC__) || defined (__ppc__) || \
|
||||
defined(_POWER) || defined(_ARCH_PPC)) || defined(__arm__)
|
||||
defined(_POWER) || defined(_ARCH_PPC) || defined(__arm__) || \
|
||||
defined(__arm64__))
|
||||
sys_icache_invalidate(const_cast<void *>(Addr), Len);
|
||||
# endif
|
||||
|
||||
|
48
lib/Target/ARM64/ARM64.h
Normal file
48
lib/Target/ARM64/ARM64.h
Normal file
@ -0,0 +1,48 @@
|
||||
//===-- ARM64.h - Top-level interface for ARM64 representation --*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the entry points for global functions defined in the LLVM
|
||||
// ARM64 back-end.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef TARGET_ARM64_H
|
||||
#define TARGET_ARM64_H
|
||||
|
||||
#include "MCTargetDesc/ARM64BaseInfo.h"
|
||||
#include "MCTargetDesc/ARM64MCTargetDesc.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Support/DataTypes.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class ARM64TargetMachine;
|
||||
class FunctionPass;
|
||||
class MachineFunctionPass;
|
||||
|
||||
FunctionPass *createARM64DeadRegisterDefinitions();
|
||||
FunctionPass *createARM64ConditionalCompares();
|
||||
FunctionPass *createARM64AdvSIMDScalar();
|
||||
FunctionPass *createARM64BranchRelaxation();
|
||||
FunctionPass *createARM64ISelDag(ARM64TargetMachine &TM,
|
||||
CodeGenOpt::Level OptLevel);
|
||||
FunctionPass *createARM64StorePairSuppressPass();
|
||||
FunctionPass *createARM64ExpandPseudoPass();
|
||||
FunctionPass *createARM64LoadStoreOptimizationPass();
|
||||
ModulePass *createARM64PromoteConstantPass();
|
||||
FunctionPass *createARM64AddressTypePromotionPass();
|
||||
/// \brief Creates an ARM-specific Target Transformation Info pass.
|
||||
ImmutablePass *createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM);
|
||||
|
||||
FunctionPass *createARM64CleanupLocalDynamicTLSPass();
|
||||
|
||||
FunctionPass *createARM64CollectLOHPass();
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
95
lib/Target/ARM64/ARM64.td
Normal file
95
lib/Target/ARM64/ARM64.td
Normal file
@ -0,0 +1,95 @@
|
||||
//===- ARM64.td - Describe the ARM64 Target Machine --------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Target-independent interfaces which we are implementing
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
include "llvm/Target/Target.td"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ARM64 Subtarget features.
|
||||
//
|
||||
|
||||
/// Cyclone has register move instructions which are "free".
|
||||
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
|
||||
"Has zereo-cycle register moves">;
|
||||
|
||||
/// Cyclone has instructions which zero registers for "free".
|
||||
def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
|
||||
"Has zero-cycle zeroing instructions">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register File Description
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
include "ARM64RegisterInfo.td"
|
||||
include "ARM64CallingConvention.td"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction Descriptions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
include "ARM64Schedule.td"
|
||||
include "ARM64InstrInfo.td"
|
||||
|
||||
def ARM64InstrInfo : InstrInfo;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ARM64 Processors supported.
|
||||
//
|
||||
include "ARM64SchedCyclone.td"
|
||||
|
||||
def : ProcessorModel<"arm64-generic", NoSchedModel, []>;
|
||||
|
||||
def : ProcessorModel<"cyclone", CycloneModel, [FeatureZCRegMove, FeatureZCZeroing]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Assembly parser
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def GenericAsmParserVariant : AsmParserVariant {
|
||||
int Variant = 0;
|
||||
string Name = "generic";
|
||||
}
|
||||
|
||||
def AppleAsmParserVariant : AsmParserVariant {
|
||||
int Variant = 1;
|
||||
string Name = "apple-neon";
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Assembly printer
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ARM64 Uses the MC printer for asm output, so make sure the TableGen
|
||||
// AsmWriter bits get associated with the correct class.
|
||||
def GenericAsmWriter : AsmWriter {
|
||||
string AsmWriterClassName = "InstPrinter";
|
||||
int Variant = 0;
|
||||
bit isMCAsmWriter = 1;
|
||||
}
|
||||
|
||||
def AppleAsmWriter : AsmWriter {
|
||||
let AsmWriterClassName = "AppleInstPrinter";
|
||||
int Variant = 1;
|
||||
int isMCAsmWriter = 1;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Target Declaration
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def ARM64 : Target {
|
||||
let InstructionSet = ARM64InstrInfo;
|
||||
let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant];
|
||||
let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter];
|
||||
}
|
505
lib/Target/ARM64/ARM64AddressTypePromotion.cpp
Normal file
505
lib/Target/ARM64/ARM64AddressTypePromotion.cpp
Normal file
@ -0,0 +1,505 @@
|
||||
|
||||
//===-- ARM64AddressTypePromotion.cpp --- Promote type for addr accesses -===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass tries to promote the computations use to obtained a sign extended
|
||||
// value used into memory accesses.
|
||||
// E.g.
|
||||
// a = add nsw i32 b, 3
|
||||
// d = sext i32 a to i64
|
||||
// e = getelementptr ..., i64 d
|
||||
//
|
||||
// =>
|
||||
// f = sext i32 b to i64
|
||||
// a = add nsw i64 f, 3
|
||||
// e = getelementptr ..., i64 a
|
||||
//
|
||||
// This is legal to do so if the computations are markers with either nsw or nuw
|
||||
// markers.
|
||||
// Moreover, the current heuristic is simple: it does not create new sext
|
||||
// operations, i.e., it gives up when a sext would have forked (e.g., if
|
||||
// a = add i32 b, c, two sexts are required to promote the computation).
|
||||
//
|
||||
// FIXME: This pass may be useful for other targets too.
|
||||
// ===---------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "arm64-type-promotion"
|
||||
#include "ARM64.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/Operator.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool>
|
||||
EnableAddressTypePromotion("arm64-type-promotion", cl::Hidden,
|
||||
cl::desc("Enable the type promotion pass"),
|
||||
cl::init(true));
|
||||
static cl::opt<bool>
|
||||
EnableMerge("arm64-type-promotion-merge", cl::Hidden,
|
||||
cl::desc("Enable merging of redundant sexts when one is dominating"
|
||||
" the other."),
|
||||
cl::init(true));
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ARM64AddressTypePromotion
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
namespace llvm {
|
||||
void initializeARM64AddressTypePromotionPass(PassRegistry &);
|
||||
}
|
||||
|
||||
namespace {
|
||||
class ARM64AddressTypePromotion : public FunctionPass {
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
ARM64AddressTypePromotion()
|
||||
: FunctionPass(ID), Func(NULL), ConsideredSExtType(NULL) {
|
||||
initializeARM64AddressTypePromotionPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "ARM64 Address Type Promotion";
|
||||
}
|
||||
|
||||
/// Iterate over the functions and promote the computation of interesting
|
||||
// sext instructions.
|
||||
bool runOnFunction(Function &F);
|
||||
|
||||
private:
|
||||
/// The current function.
|
||||
Function *Func;
|
||||
/// Filter out all sexts that does not have this type.
|
||||
/// Currently initialized with Int64Ty.
|
||||
Type *ConsideredSExtType;
|
||||
|
||||
// This transformation requires dominator info.
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesCFG();
|
||||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
AU.addPreserved<DominatorTreeWrapperPass>();
|
||||
FunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
typedef SmallPtrSet<Instruction *, 32> SetOfInstructions;
|
||||
typedef SmallVector<Instruction *, 16> Instructions;
|
||||
typedef DenseMap<Value *, Instructions> ValueToInsts;
|
||||
|
||||
/// Check if it is profitable to move a sext through this instruction.
|
||||
/// Currently, we consider it is profitable if:
|
||||
/// - Inst is used only once (no need to insert truncate).
|
||||
/// - Inst has only one operand that will require a sext operation (we do
|
||||
/// do not create new sext operation).
|
||||
bool shouldGetThrough(const Instruction *Inst);
|
||||
|
||||
/// Check if it is possible and legal to move a sext through this
|
||||
/// instruction.
|
||||
/// Current heuristic considers that we can get through:
|
||||
/// - Arithmetic operation marked with the nsw or nuw flag.
|
||||
/// - Other sext operation.
|
||||
/// - Truncate operation if it was just dropping sign extended bits.
|
||||
bool canGetThrough(const Instruction *Inst);
|
||||
|
||||
/// Move sext operations through safe to sext instructions.
|
||||
bool propagateSignExtension(Instructions &SExtInsts);
|
||||
|
||||
/// Is this sext should be considered for code motion.
|
||||
/// We look for sext with ConsideredSExtType and uses in at least one
|
||||
// GetElementPtrInst.
|
||||
bool shouldConsiderSExt(const Instruction *SExt) const;
|
||||
|
||||
/// Collect all interesting sext operations, i.e., the ones with the right
|
||||
/// type and used in memory accesses.
|
||||
/// More precisely, a sext instruction is considered as interesting if it
|
||||
/// is used in a "complex" getelementptr or it exits at least another
|
||||
/// sext instruction that sign extended the same initial value.
|
||||
/// A getelementptr is considered as "complex" if it has more than 2
|
||||
// operands.
|
||||
void analyzeSExtension(Instructions &SExtInsts);
|
||||
|
||||
/// Merge redundant sign extension operations in common dominator.
|
||||
void mergeSExts(ValueToInsts &ValToSExtendedUses,
|
||||
SetOfInstructions &ToRemove);
|
||||
};
|
||||
} // end anonymous namespace.
|
||||
|
||||
char ARM64AddressTypePromotion::ID = 0;
|
||||
|
||||
INITIALIZE_PASS_BEGIN(ARM64AddressTypePromotion, "arm64-type-promotion",
|
||||
"ARM64 Type Promotion Pass", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
||||
INITIALIZE_PASS_END(ARM64AddressTypePromotion, "arm64-type-promotion",
|
||||
"ARM64 Type Promotion Pass", false, false)
|
||||
|
||||
FunctionPass *llvm::createARM64AddressTypePromotionPass() {
|
||||
return new ARM64AddressTypePromotion();
|
||||
}
|
||||
|
||||
bool ARM64AddressTypePromotion::canGetThrough(const Instruction *Inst) {
|
||||
if (isa<SExtInst>(Inst))
|
||||
return true;
|
||||
|
||||
const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
|
||||
if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
|
||||
(BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap()))
|
||||
return true;
|
||||
|
||||
// sext(trunc(sext)) --> sext
|
||||
if (isa<TruncInst>(Inst) && isa<SExtInst>(Inst->getOperand(0))) {
|
||||
const Instruction *Opnd = cast<Instruction>(Inst->getOperand(0));
|
||||
// Check that the truncate just drop sign extended bits.
|
||||
if (Inst->getType()->getIntegerBitWidth() >=
|
||||
Opnd->getOperand(0)->getType()->getIntegerBitWidth() &&
|
||||
Inst->getOperand(0)->getType()->getIntegerBitWidth() <=
|
||||
ConsideredSExtType->getIntegerBitWidth())
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ARM64AddressTypePromotion::shouldGetThrough(const Instruction *Inst) {
|
||||
// If the type of the sext is the same as the considered one, this sext
|
||||
// will become useless.
|
||||
// Otherwise, we will have to do something to preserve the original value,
|
||||
// unless it is used once.
|
||||
if (isa<SExtInst>(Inst) &&
|
||||
(Inst->getType() == ConsideredSExtType || Inst->hasOneUse()))
|
||||
return true;
|
||||
|
||||
// If the Inst is used more that once, we may need to insert truncate
|
||||
// operations and we don't do that at the moment.
|
||||
if (!Inst->hasOneUse())
|
||||
return false;
|
||||
|
||||
// This truncate is used only once, thus if we can get thourgh, it will become
|
||||
// useless.
|
||||
if (isa<TruncInst>(Inst))
|
||||
return true;
|
||||
|
||||
// If both operands are not constant, a new sext will be created here.
|
||||
// Current heuristic is: each step should be profitable.
|
||||
// Therefore we don't allow to increase the number of sext even if it may
|
||||
// be profitable later on.
|
||||
if (isa<BinaryOperator>(Inst) && isa<ConstantInt>(Inst->getOperand(1)))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) {
|
||||
if (isa<SelectInst>(Inst) && OpIdx == 0)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ARM64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const {
|
||||
if (SExt->getType() != ConsideredSExtType)
|
||||
return false;
|
||||
|
||||
for (Value::const_use_iterator UseIt = SExt->use_begin(),
|
||||
EndUseIt = SExt->use_end();
|
||||
UseIt != EndUseIt; ++UseIt) {
|
||||
if (isa<GetElementPtrInst>(*UseIt))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Input:
|
||||
// - SExtInsts contains all the sext instructions that are use direclty in
|
||||
// GetElementPtrInst, i.e., access to memory.
|
||||
// Algorithm:
|
||||
// - For each sext operation in SExtInsts:
|
||||
// Let var be the operand of sext.
|
||||
// while it is profitable (see shouldGetThrough), legal, and safe
|
||||
// (see canGetThrough) to move sext through var's definition:
|
||||
// * promote the type of var's definition.
|
||||
// * fold var into sext uses.
|
||||
// * move sext above var's definition.
|
||||
// * update sext operand to use the operand of var that should be sign
|
||||
// extended (by construction there is only one).
|
||||
//
|
||||
// E.g.,
|
||||
// a = ... i32 c, 3
|
||||
// b = sext i32 a to i64 <- is it legal/safe/profitable to get through 'a'
|
||||
// ...
|
||||
// = b
|
||||
// => Yes, update the code
|
||||
// b = sext i32 c to i64
|
||||
// a = ... i64 b, 3
|
||||
// ...
|
||||
// = a
|
||||
// Iterate on 'c'.
|
||||
bool
|
||||
ARM64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
|
||||
DEBUG(dbgs() << "*** Propagate Sign Extension ***\n");
|
||||
|
||||
bool LocalChange = false;
|
||||
SetOfInstructions ToRemove;
|
||||
ValueToInsts ValToSExtendedUses;
|
||||
while (!SExtInsts.empty()) {
|
||||
// Get through simple chain.
|
||||
Instruction *SExt = SExtInsts.pop_back_val();
|
||||
|
||||
DEBUG(dbgs() << "Consider:\n" << *SExt << '\n');
|
||||
|
||||
// If this SExt has already been merged continue.
|
||||
if (SExt->use_empty() && ToRemove.count(SExt)) {
|
||||
DEBUG(dbgs() << "No uses => marked as delete\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Now try to get through the chain of definitions.
|
||||
while (isa<Instruction>(SExt->getOperand(0))) {
|
||||
Instruction *Inst = dyn_cast<Instruction>(SExt->getOperand(0));
|
||||
DEBUG(dbgs() << "Try to get through:\n" << *Inst << '\n');
|
||||
if (!canGetThrough(Inst) || !shouldGetThrough(Inst)) {
|
||||
// We cannot get through something that is not an Instruction
|
||||
// or not safe to SExt.
|
||||
DEBUG(dbgs() << "Cannot get through\n");
|
||||
break;
|
||||
}
|
||||
|
||||
LocalChange = true;
|
||||
// If this is a sign extend, it becomes useless.
|
||||
if (isa<SExtInst>(Inst) || isa<TruncInst>(Inst)) {
|
||||
DEBUG(dbgs() << "SExt or trunc, mark it as to remove\n");
|
||||
// We cannot use replaceAllUsesWith here because we may trigger some
|
||||
// assertion on the type as all involved sext operation may have not
|
||||
// been moved yet.
|
||||
while (!Inst->use_empty()) {
|
||||
Value::use_iterator UseIt = Inst->use_begin();
|
||||
Instruction *UseInst = dyn_cast<Instruction>(*UseIt);
|
||||
assert(UseInst && "Use of sext is not an Instruction!");
|
||||
UseInst->setOperand(UseIt->getOperandNo(), SExt);
|
||||
}
|
||||
ToRemove.insert(Inst);
|
||||
SExt->setOperand(0, Inst->getOperand(0));
|
||||
SExt->moveBefore(Inst);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get through the Instruction:
|
||||
// 1. Update its type.
|
||||
// 2. Replace the uses of SExt by Inst.
|
||||
// 3. Sign extend each operand that needs to be sign extended.
|
||||
|
||||
// Step #1.
|
||||
Inst->mutateType(SExt->getType());
|
||||
// Step #2.
|
||||
SExt->replaceAllUsesWith(Inst);
|
||||
// Step #3.
|
||||
Instruction *SExtForOpnd = SExt;
|
||||
|
||||
DEBUG(dbgs() << "Propagate SExt to operands\n");
|
||||
for (int OpIdx = 0, EndOpIdx = Inst->getNumOperands(); OpIdx != EndOpIdx;
|
||||
++OpIdx) {
|
||||
DEBUG(dbgs() << "Operand:\n" << *(Inst->getOperand(OpIdx)) << '\n');
|
||||
if (Inst->getOperand(OpIdx)->getType() == SExt->getType() ||
|
||||
!shouldSExtOperand(Inst, OpIdx)) {
|
||||
DEBUG(dbgs() << "No need to propagate\n");
|
||||
continue;
|
||||
}
|
||||
// Check if we can statically sign extend the operand.
|
||||
Value *Opnd = Inst->getOperand(OpIdx);
|
||||
if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
|
||||
DEBUG(dbgs() << "Statically sign extend\n");
|
||||
Inst->setOperand(OpIdx, ConstantInt::getSigned(SExt->getType(),
|
||||
Cst->getSExtValue()));
|
||||
continue;
|
||||
}
|
||||
// UndefValue are typed, so we have to statically sign extend them.
|
||||
if (isa<UndefValue>(Opnd)) {
|
||||
DEBUG(dbgs() << "Statically sign extend\n");
|
||||
Inst->setOperand(OpIdx, UndefValue::get(SExt->getType()));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise we have to explicity sign extend it.
|
||||
assert(SExtForOpnd &&
|
||||
"Only one operand should have been sign extended");
|
||||
|
||||
SExtForOpnd->setOperand(0, Opnd);
|
||||
|
||||
DEBUG(dbgs() << "Move before:\n" << *Inst << "\nSign extend\n");
|
||||
// Move the sign extension before the insertion point.
|
||||
SExtForOpnd->moveBefore(Inst);
|
||||
Inst->setOperand(OpIdx, SExtForOpnd);
|
||||
// If more sext are required, new instructions will have to be created.
|
||||
SExtForOpnd = NULL;
|
||||
}
|
||||
if (SExtForOpnd == SExt) {
|
||||
DEBUG(dbgs() << "Sign extension is useless now\n");
|
||||
ToRemove.insert(SExt);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If the use is already of the right type, connect its uses to its argument
|
||||
// and delete it.
|
||||
// This can happen for an Instruction which all uses are sign extended.
|
||||
if (!ToRemove.count(SExt) &&
|
||||
SExt->getType() == SExt->getOperand(0)->getType()) {
|
||||
DEBUG(dbgs() << "Sign extension is useless, attach its use to "
|
||||
"its argument\n");
|
||||
SExt->replaceAllUsesWith(SExt->getOperand(0));
|
||||
ToRemove.insert(SExt);
|
||||
} else
|
||||
ValToSExtendedUses[SExt->getOperand(0)].push_back(SExt);
|
||||
}
|
||||
|
||||
if (EnableMerge)
|
||||
mergeSExts(ValToSExtendedUses, ToRemove);
|
||||
|
||||
// Remove all instructions marked as ToRemove.
|
||||
for (SetOfInstructions::iterator ToRemoveIt = ToRemove.begin(),
|
||||
EndToRemoveIt = ToRemove.end();
|
||||
ToRemoveIt != EndToRemoveIt; ++ToRemoveIt)
|
||||
(*ToRemoveIt)->eraseFromParent();
|
||||
return LocalChange;
|
||||
}
|
||||
|
||||
void ARM64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses,
|
||||
SetOfInstructions &ToRemove) {
|
||||
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||
|
||||
for (ValueToInsts::iterator It = ValToSExtendedUses.begin(),
|
||||
EndIt = ValToSExtendedUses.end();
|
||||
It != EndIt; ++It) {
|
||||
Instructions &Insts = It->second;
|
||||
Instructions CurPts;
|
||||
for (Instructions::iterator IIt = Insts.begin(), EndIIt = Insts.end();
|
||||
IIt != EndIIt; ++IIt) {
|
||||
if (ToRemove.count(*IIt))
|
||||
continue;
|
||||
bool inserted = false;
|
||||
for (Instructions::iterator CurPtsIt = CurPts.begin(),
|
||||
EndCurPtsIt = CurPts.end();
|
||||
CurPtsIt != EndCurPtsIt; ++CurPtsIt) {
|
||||
if (DT.dominates(*IIt, *CurPtsIt)) {
|
||||
DEBUG(dbgs() << "Replace all uses of:\n" << **CurPtsIt << "\nwith:\n"
|
||||
<< **IIt << '\n');
|
||||
(*CurPtsIt)->replaceAllUsesWith(*IIt);
|
||||
ToRemove.insert(*CurPtsIt);
|
||||
*CurPtsIt = *IIt;
|
||||
inserted = true;
|
||||
break;
|
||||
}
|
||||
if (!DT.dominates(*CurPtsIt, *IIt))
|
||||
// Give up if we need to merge in a common dominator as the
|
||||
// expermients show it is not profitable.
|
||||
continue;
|
||||
|
||||
DEBUG(dbgs() << "Replace all uses of:\n" << **IIt << "\nwith:\n"
|
||||
<< **CurPtsIt << '\n');
|
||||
(*IIt)->replaceAllUsesWith(*CurPtsIt);
|
||||
ToRemove.insert(*IIt);
|
||||
inserted = true;
|
||||
break;
|
||||
}
|
||||
if (!inserted)
|
||||
CurPts.push_back(*IIt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ARM64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) {
|
||||
DEBUG(dbgs() << "*** Analyze Sign Extensions ***\n");
|
||||
|
||||
DenseMap<Value *, Instruction *> SeenChains;
|
||||
|
||||
for (Function::iterator IBB = Func->begin(), IEndBB = Func->end();
|
||||
IBB != IEndBB; ++IBB) {
|
||||
for (BasicBlock::iterator II = IBB->begin(), IEndI = IBB->end();
|
||||
II != IEndI; ++II) {
|
||||
|
||||
// Collect all sext operation per type.
|
||||
if (!isa<SExtInst>(II) || !shouldConsiderSExt(II))
|
||||
continue;
|
||||
Instruction *SExt = II;
|
||||
|
||||
DEBUG(dbgs() << "Found:\n" << (*II) << '\n');
|
||||
|
||||
// Cases where we actually perform the optimization:
|
||||
// 1. SExt is used in a getelementptr with more than 2 operand =>
|
||||
// likely we can merge some computation if they are done on 64 bits.
|
||||
// 2. The beginning of the SExt chain is SExt several time. =>
|
||||
// code sharing is possible.
|
||||
|
||||
bool insert = false;
|
||||
// #1.
|
||||
for (Value::use_iterator UseIt = SExt->use_begin(),
|
||||
EndUseIt = SExt->use_end();
|
||||
UseIt != EndUseIt; ++UseIt) {
|
||||
const Instruction *Inst = dyn_cast<GetElementPtrInst>(*UseIt);
|
||||
if (Inst && Inst->getNumOperands() > 2) {
|
||||
DEBUG(dbgs() << "Interesting use in GetElementPtrInst\n" << *Inst
|
||||
<< '\n');
|
||||
insert = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// #2.
|
||||
// Check the head of the chain.
|
||||
Instruction *Inst = SExt;
|
||||
Value *Last;
|
||||
do {
|
||||
int OpdIdx = 0;
|
||||
const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
|
||||
if (BinOp && isa<ConstantInt>(BinOp->getOperand(0)))
|
||||
OpdIdx = 1;
|
||||
Last = Inst->getOperand(OpdIdx);
|
||||
Inst = dyn_cast<Instruction>(Last);
|
||||
} while (Inst && canGetThrough(Inst) && shouldGetThrough(Inst));
|
||||
|
||||
DEBUG(dbgs() << "Head of the chain:\n" << *Last << '\n');
|
||||
DenseMap<Value *, Instruction *>::iterator AlreadySeen =
|
||||
SeenChains.find(Last);
|
||||
if (insert || AlreadySeen != SeenChains.end()) {
|
||||
DEBUG(dbgs() << "Insert\n");
|
||||
SExtInsts.push_back(II);
|
||||
if (AlreadySeen != SeenChains.end() && AlreadySeen->second != NULL) {
|
||||
DEBUG(dbgs() << "Insert chain member\n");
|
||||
SExtInsts.push_back(AlreadySeen->second);
|
||||
SeenChains[Last] = NULL;
|
||||
}
|
||||
} else {
|
||||
DEBUG(dbgs() << "Record its chain membership\n");
|
||||
SeenChains[Last] = SExt;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool ARM64AddressTypePromotion::runOnFunction(Function &F) {
|
||||
if (!EnableAddressTypePromotion || F.isDeclaration())
|
||||
return false;
|
||||
Func = &F;
|
||||
ConsideredSExtType = Type::getInt64Ty(Func->getContext());
|
||||
|
||||
DEBUG(dbgs() << "*** " << getPassName() << ": " << Func->getName() << '\n');
|
||||
|
||||
Instructions SExtInsts;
|
||||
analyzeSExtension(SExtInsts);
|
||||
return propagateSignExtension(SExtInsts);
|
||||
}
|
392
lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp
Normal file
392
lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp
Normal file
@ -0,0 +1,392 @@
|
||||
//===-- ARM64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// When profitable, replace GPR targeting i64 instructions with their
|
||||
// AdvSIMD scalar equivalents. Generally speaking, "profitable" is defined
|
||||
// as minimizing the number of cross-class register copies.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TODO: Graph based predicate heuristics.
|
||||
// Walking the instruction list linearly will get many, perhaps most, of
|
||||
// the cases, but to do a truly throrough job of this, we need a more
|
||||
// wholistic approach.
|
||||
//
|
||||
// This optimization is very similar in spirit to the register allocator's
|
||||
// spill placement, only here we're determining where to place cross-class
|
||||
// register copies rather than spills. As such, a similar approach is
|
||||
// called for.
|
||||
//
|
||||
// We want to build up a set of graphs of all instructions which are candidates
|
||||
// for transformation along with instructions which generate their inputs and
|
||||
// consume their outputs. For each edge in the graph, we assign a weight
|
||||
// based on whether there is a copy required there (weight zero if not) and
|
||||
// the block frequency of the block containing the defining or using
|
||||
// instruction, whichever is less. Our optimization is then a graph problem
|
||||
// to minimize the total weight of all the graphs, then transform instructions
|
||||
// and add or remove copy instructions as called for to implement the
|
||||
// solution.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "arm64-simd-scalar"
|
||||
#include "ARM64.h"
|
||||
#include "ARM64InstrInfo.h"
|
||||
#include "ARM64RegisterInfo.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool>
|
||||
AdvSIMDScalar("arm64-simd-scalar",
|
||||
cl::desc("enable use of AdvSIMD scalar integer instructions"),
|
||||
cl::init(false), cl::Hidden);
|
||||
// Allow forcing all i64 operations with equivalent SIMD instructions to use
|
||||
// them. For stress-testing the transformation function.
|
||||
static cl::opt<bool>
|
||||
TransformAll("arm64-simd-scalar-force-all",
|
||||
cl::desc("Force use of AdvSIMD scalar instructions everywhere"),
|
||||
cl::init(false), cl::Hidden);
|
||||
|
||||
STATISTIC(NumScalarInsnsUsed, "Number of scalar instructions used");
|
||||
STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted");
|
||||
STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted");
|
||||
|
||||
namespace {
|
||||
class ARM64AdvSIMDScalar : public MachineFunctionPass {
|
||||
MachineRegisterInfo *MRI;
|
||||
const ARM64InstrInfo *TII;
|
||||
|
||||
private:
|
||||
// isProfitableToTransform - Predicate function to determine whether an
|
||||
// instruction should be transformed to its equivalent AdvSIMD scalar
|
||||
// instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example.
|
||||
bool isProfitableToTransform(const MachineInstr *MI) const;
|
||||
|
||||
// tranformInstruction - Perform the transformation of an instruction
|
||||
// to its equivalant AdvSIMD scalar instruction. Update inputs and outputs
|
||||
// to be the correct register class, minimizing cross-class copies.
|
||||
void transformInstruction(MachineInstr *MI);
|
||||
|
||||
// processMachineBasicBlock - Main optimzation loop.
|
||||
bool processMachineBasicBlock(MachineBasicBlock *MBB);
|
||||
|
||||
public:
|
||||
static char ID; // Pass identification, replacement for typeid.
|
||||
explicit ARM64AdvSIMDScalar() : MachineFunctionPass(ID) {}
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &F);
|
||||
|
||||
const char *getPassName() const {
|
||||
return "AdvSIMD scalar operation optimization";
|
||||
}
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesCFG();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
char ARM64AdvSIMDScalar::ID = 0;
|
||||
} // end anonymous namespace
|
||||
|
||||
static bool isGPR64(unsigned Reg, unsigned SubReg,
|
||||
const MachineRegisterInfo *MRI) {
|
||||
if (SubReg)
|
||||
return false;
|
||||
if (TargetRegisterInfo::isVirtualRegister(Reg))
|
||||
return MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::GPR64RegClass);
|
||||
return ARM64::GPR64RegClass.contains(Reg);
|
||||
}
|
||||
|
||||
static bool isFPR64(unsigned Reg, unsigned SubReg,
|
||||
const MachineRegisterInfo *MRI) {
|
||||
if (TargetRegisterInfo::isVirtualRegister(Reg))
|
||||
return (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR64RegClass) &&
|
||||
SubReg == 0) ||
|
||||
(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR128RegClass) &&
|
||||
SubReg == ARM64::dsub);
|
||||
// Physical register references just check the regist class directly.
|
||||
return (ARM64::FPR64RegClass.contains(Reg) && SubReg == 0) ||
|
||||
(ARM64::FPR128RegClass.contains(Reg) && SubReg == ARM64::dsub);
|
||||
}
|
||||
|
||||
// getSrcFromCopy - Get the original source register for a GPR64 <--> FPR64
|
||||
// copy instruction. Return zero_reg if the instruction is not a copy.
|
||||
static unsigned getSrcFromCopy(const MachineInstr *MI,
|
||||
const MachineRegisterInfo *MRI,
|
||||
unsigned &SubReg) {
|
||||
SubReg = 0;
|
||||
// The "FMOV Xd, Dn" instruction is the typical form.
|
||||
if (MI->getOpcode() == ARM64::FMOVDXr || MI->getOpcode() == ARM64::FMOVXDr)
|
||||
return MI->getOperand(1).getReg();
|
||||
// A lane zero extract "UMOV.d Xd, Vn[0]" is equivalent. We shouldn't see
|
||||
// these at this stage, but it's easy to check for.
|
||||
if (MI->getOpcode() == ARM64::UMOVvi64 && MI->getOperand(2).getImm() == 0) {
|
||||
SubReg = ARM64::dsub;
|
||||
return MI->getOperand(1).getReg();
|
||||
}
|
||||
// Or just a plain COPY instruction. This can be directly to/from FPR64,
|
||||
// or it can be a dsub subreg reference to an FPR128.
|
||||
if (MI->getOpcode() == ARM64::COPY) {
|
||||
if (isFPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(),
|
||||
MRI) &&
|
||||
isGPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(), MRI))
|
||||
return MI->getOperand(1).getReg();
|
||||
if (isGPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(),
|
||||
MRI) &&
|
||||
isFPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(),
|
||||
MRI)) {
|
||||
SubReg = ARM64::dsub;
|
||||
return MI->getOperand(1).getReg();
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise, this is some other kind of instruction.
|
||||
return 0;
|
||||
}
|
||||
|
||||
// getTransformOpcode - For any opcode for which there is an AdvSIMD equivalent
|
||||
// that we're considering transforming to, return that AdvSIMD opcode. For all
|
||||
// others, return the original opcode.
|
||||
static int getTransformOpcode(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
default:
|
||||
break;
|
||||
// FIXME: Lots more possibilities.
|
||||
case ARM64::ADDXrr:
|
||||
return ARM64::ADDv1i64;
|
||||
case ARM64::SUBXrr:
|
||||
return ARM64::SUBv1i64;
|
||||
}
|
||||
// No AdvSIMD equivalent, so just return the original opcode.
|
||||
return Opc;
|
||||
}
|
||||
|
||||
static bool isTransformable(const MachineInstr *MI) {
|
||||
int Opc = MI->getOpcode();
|
||||
return Opc != getTransformOpcode(Opc);
|
||||
}
|
||||
|
||||
// isProfitableToTransform - Predicate function to determine whether an
|
||||
// instruction should be transformed to its equivalent AdvSIMD scalar
|
||||
// instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example.
|
||||
bool ARM64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const {
|
||||
// If this instruction isn't eligible to be transformed (no SIMD equivalent),
|
||||
// early exit since that's the common case.
|
||||
if (!isTransformable(MI))
|
||||
return false;
|
||||
|
||||
// Count the number of copies we'll need to add and approximate the number
|
||||
// of copies that a transform will enable us to remove.
|
||||
unsigned NumNewCopies = 3;
|
||||
unsigned NumRemovableCopies = 0;
|
||||
|
||||
unsigned OrigSrc0 = MI->getOperand(1).getReg();
|
||||
unsigned OrigSrc1 = MI->getOperand(2).getReg();
|
||||
unsigned Src0 = 0, SubReg0;
|
||||
unsigned Src1 = 0, SubReg1;
|
||||
if (!MRI->def_empty(OrigSrc0)) {
|
||||
MachineRegisterInfo::def_instr_iterator Def =
|
||||
MRI->def_instr_begin(OrigSrc0);
|
||||
assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
|
||||
Src0 = getSrcFromCopy(&*Def, MRI, SubReg0);
|
||||
// If the source was from a copy, we don't need to insert a new copy.
|
||||
if (Src0)
|
||||
--NumNewCopies;
|
||||
// If there are no other users of the original source, we can delete
|
||||
// that instruction.
|
||||
if (Src0 && MRI->hasOneNonDBGUse(OrigSrc0))
|
||||
++NumRemovableCopies;
|
||||
}
|
||||
if (!MRI->def_empty(OrigSrc1)) {
|
||||
MachineRegisterInfo::def_instr_iterator Def =
|
||||
MRI->def_instr_begin(OrigSrc1);
|
||||
assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
|
||||
Src1 = getSrcFromCopy(&*Def, MRI, SubReg1);
|
||||
if (Src1)
|
||||
--NumNewCopies;
|
||||
// If there are no other users of the original source, we can delete
|
||||
// that instruction.
|
||||
if (Src1 && MRI->hasOneNonDBGUse(OrigSrc1))
|
||||
++NumRemovableCopies;
|
||||
}
|
||||
|
||||
// If any of the uses of the original instructions is a cross class copy,
|
||||
// that's a copy that will be removable if we transform. Likewise, if
|
||||
// any of the uses is a transformable instruction, it's likely the tranforms
|
||||
// will chain, enabling us to save a copy there, too. This is an aggressive
|
||||
// heuristic that approximates the graph based cost analysis described above.
|
||||
unsigned Dst = MI->getOperand(0).getReg();
|
||||
bool AllUsesAreCopies = true;
|
||||
for (MachineRegisterInfo::use_instr_nodbg_iterator
|
||||
Use = MRI->use_instr_nodbg_begin(Dst),
|
||||
E = MRI->use_instr_nodbg_end();
|
||||
Use != E; ++Use) {
|
||||
unsigned SubReg;
|
||||
if (getSrcFromCopy(&*Use, MRI, SubReg) || isTransformable(&*Use))
|
||||
++NumRemovableCopies;
|
||||
// If the use is an INSERT_SUBREG, that's still something that can
|
||||
// directly use the FPR64, so we don't invalidate AllUsesAreCopies. It's
|
||||
// preferable to have it use the FPR64 in most cases, as if the source
|
||||
// vector is an IMPLICIT_DEF, the INSERT_SUBREG just goes away entirely.
|
||||
// Ditto for a lane insert.
|
||||
else if (Use->getOpcode() == ARM64::INSERT_SUBREG ||
|
||||
Use->getOpcode() == ARM64::INSvi64gpr)
|
||||
;
|
||||
else
|
||||
AllUsesAreCopies = false;
|
||||
}
|
||||
// If all of the uses of the original destination register are copies to
|
||||
// FPR64, then we won't end up having a new copy back to GPR64 either.
|
||||
if (AllUsesAreCopies)
|
||||
--NumNewCopies;
|
||||
|
||||
// If a tranform will not increase the number of cross-class copies required,
|
||||
// return true.
|
||||
if (NumNewCopies <= NumRemovableCopies)
|
||||
return true;
|
||||
|
||||
// Finally, even if we otherwise wouldn't transform, check if we're forcing
|
||||
// transformation of everything.
|
||||
return TransformAll;
|
||||
}
|
||||
|
||||
static MachineInstr *insertCopy(const ARM64InstrInfo *TII, MachineInstr *MI,
|
||||
unsigned Dst, unsigned Src, bool IsKill) {
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(ARM64::COPY),
|
||||
Dst)
|
||||
.addReg(Src, getKillRegState(IsKill));
|
||||
DEBUG(dbgs() << " adding copy: " << *MIB);
|
||||
++NumCopiesInserted;
|
||||
return MIB;
|
||||
}
|
||||
|
||||
// tranformInstruction - Perform the transformation of an instruction
|
||||
// to its equivalant AdvSIMD scalar instruction. Update inputs and outputs
|
||||
// to be the correct register class, minimizing cross-class copies.
|
||||
void ARM64AdvSIMDScalar::transformInstruction(MachineInstr *MI) {
|
||||
DEBUG(dbgs() << "Scalar transform: " << *MI);
|
||||
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
int OldOpc = MI->getOpcode();
|
||||
int NewOpc = getTransformOpcode(OldOpc);
|
||||
assert(OldOpc != NewOpc && "transform an instruction to itself?!");
|
||||
|
||||
// Check if we need a copy for the source registers.
|
||||
unsigned OrigSrc0 = MI->getOperand(1).getReg();
|
||||
unsigned OrigSrc1 = MI->getOperand(2).getReg();
|
||||
unsigned Src0 = 0, SubReg0;
|
||||
unsigned Src1 = 0, SubReg1;
|
||||
if (!MRI->def_empty(OrigSrc0)) {
|
||||
MachineRegisterInfo::def_instr_iterator Def =
|
||||
MRI->def_instr_begin(OrigSrc0);
|
||||
assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
|
||||
Src0 = getSrcFromCopy(&*Def, MRI, SubReg0);
|
||||
// If there are no other users of the original source, we can delete
|
||||
// that instruction.
|
||||
if (Src0 && MRI->hasOneNonDBGUse(OrigSrc0)) {
|
||||
assert(Src0 && "Can't delete copy w/o a valid original source!");
|
||||
Def->eraseFromParent();
|
||||
++NumCopiesDeleted;
|
||||
}
|
||||
}
|
||||
if (!MRI->def_empty(OrigSrc1)) {
|
||||
MachineRegisterInfo::def_instr_iterator Def =
|
||||
MRI->def_instr_begin(OrigSrc1);
|
||||
assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
|
||||
Src1 = getSrcFromCopy(&*Def, MRI, SubReg1);
|
||||
// If there are no other users of the original source, we can delete
|
||||
// that instruction.
|
||||
if (Src1 && MRI->hasOneNonDBGUse(OrigSrc1)) {
|
||||
assert(Src1 && "Can't delete copy w/o a valid original source!");
|
||||
Def->eraseFromParent();
|
||||
++NumCopiesDeleted;
|
||||
}
|
||||
}
|
||||
// If we weren't able to reference the original source directly, create a
|
||||
// copy.
|
||||
if (!Src0) {
|
||||
SubReg0 = 0;
|
||||
Src0 = MRI->createVirtualRegister(&ARM64::FPR64RegClass);
|
||||
insertCopy(TII, MI, Src0, OrigSrc0, true);
|
||||
}
|
||||
if (!Src1) {
|
||||
SubReg1 = 0;
|
||||
Src1 = MRI->createVirtualRegister(&ARM64::FPR64RegClass);
|
||||
insertCopy(TII, MI, Src1, OrigSrc1, true);
|
||||
}
|
||||
|
||||
// Create a vreg for the destination.
|
||||
// FIXME: No need to do this if the ultimate user expects an FPR64.
|
||||
// Check for that and avoid the copy if possible.
|
||||
unsigned Dst = MRI->createVirtualRegister(&ARM64::FPR64RegClass);
|
||||
|
||||
// For now, all of the new instructions have the same simple three-register
|
||||
// form, so no need to special case based on what instruction we're
|
||||
// building.
|
||||
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(NewOpc), Dst)
|
||||
.addReg(Src0, getKillRegState(true), SubReg0)
|
||||
.addReg(Src1, getKillRegState(true), SubReg1);
|
||||
|
||||
// Now copy the result back out to a GPR.
|
||||
// FIXME: Try to avoid this if all uses could actually just use the FPR64
|
||||
// directly.
|
||||
insertCopy(TII, MI, MI->getOperand(0).getReg(), Dst, true);
|
||||
|
||||
// Erase the old instruction.
|
||||
MI->eraseFromParent();
|
||||
|
||||
++NumScalarInsnsUsed;
|
||||
}
|
||||
|
||||
// processMachineBasicBlock - Main optimzation loop.
|
||||
bool ARM64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) {
|
||||
bool Changed = false;
|
||||
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
|
||||
MachineInstr *MI = I;
|
||||
++I;
|
||||
if (isProfitableToTransform(MI)) {
|
||||
transformInstruction(MI);
|
||||
Changed = true;
|
||||
}
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
// runOnMachineFunction - Pass entry point from PassManager.
|
||||
bool ARM64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
|
||||
// Early exit if pass disabled.
|
||||
if (!AdvSIMDScalar)
|
||||
return false;
|
||||
|
||||
bool Changed = false;
|
||||
DEBUG(dbgs() << "***** ARM64AdvSIMDScalar *****\n");
|
||||
|
||||
const TargetMachine &TM = mf.getTarget();
|
||||
MRI = &mf.getRegInfo();
|
||||
TII = static_cast<const ARM64InstrInfo *>(TM.getInstrInfo());
|
||||
|
||||
// Just check things on a one-block-at-a-time basis.
|
||||
for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I)
|
||||
if (processMachineBasicBlock(I))
|
||||
Changed = true;
|
||||
return Changed;
|
||||
}
|
||||
|
||||
// createARM64AdvSIMDScalar - Factory function used by ARM64TargetMachine
|
||||
// to add the pass to the PassManager.
|
||||
FunctionPass *llvm::createARM64AdvSIMDScalar() {
|
||||
return new ARM64AdvSIMDScalar();
|
||||
}
|
573
lib/Target/ARM64/ARM64AsmPrinter.cpp
Normal file
573
lib/Target/ARM64/ARM64AsmPrinter.cpp
Normal file
@ -0,0 +1,573 @@
|
||||
//===-- ARM64AsmPrinter.cpp - ARM64 LLVM assembly writer ------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains a printer that converts from our internal representation
|
||||
// of machine-dependent LLVM code to the ARM64 assembly language.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "asm-printer"
|
||||
#include "ARM64.h"
|
||||
#include "ARM64MachineFunctionInfo.h"
|
||||
#include "ARM64MCInstLower.h"
|
||||
#include "ARM64RegisterInfo.h"
|
||||
#include "InstPrinter/ARM64InstPrinter.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/ADT/StringSwitch.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/CodeGen/AsmPrinter.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/StackMaps.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/DebugInfo.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCInstBuilder.h"
|
||||
#include "llvm/MC/MCLinkerOptimizationHint.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class ARM64AsmPrinter : public AsmPrinter {
|
||||
ARM64MCInstLower MCInstLowering;
|
||||
StackMaps SM;
|
||||
|
||||
public:
|
||||
ARM64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
|
||||
: AsmPrinter(TM, Streamer), MCInstLowering(OutContext, *Mang, *this),
|
||||
SM(*this), ARM64FI(NULL), LOHLabelCounter(0) {}
|
||||
|
||||
virtual const char *getPassName() const { return "ARM64 Assembly Printer"; }
|
||||
|
||||
/// \brief Wrapper for MCInstLowering.lowerOperand() for the
|
||||
/// tblgen'erated pseudo lowering.
|
||||
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const {
|
||||
return MCInstLowering.lowerOperand(MO, MCOp);
|
||||
}
|
||||
|
||||
void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
|
||||
const MachineInstr &MI);
|
||||
void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
|
||||
const MachineInstr &MI);
|
||||
/// \brief tblgen'erated driver function for lowering simple MI->MC
|
||||
/// pseudo instructions.
|
||||
bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
|
||||
const MachineInstr *MI);
|
||||
|
||||
void EmitInstruction(const MachineInstr *MI);
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AsmPrinter::getAnalysisUsage(AU);
|
||||
AU.setPreservesAll();
|
||||
}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &F) {
|
||||
ARM64FI = F.getInfo<ARM64FunctionInfo>();
|
||||
return AsmPrinter::runOnMachineFunction(F);
|
||||
}
|
||||
|
||||
private:
|
||||
MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
|
||||
void printOperand(const MachineInstr *MI, unsigned OpNum, raw_ostream &O);
|
||||
bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O);
|
||||
bool printAsmRegInClass(const MachineOperand &MO,
|
||||
const TargetRegisterClass *RC, bool isVector,
|
||||
raw_ostream &O);
|
||||
|
||||
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
|
||||
unsigned AsmVariant, const char *ExtraCode,
|
||||
raw_ostream &O);
|
||||
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
|
||||
unsigned AsmVariant, const char *ExtraCode,
|
||||
raw_ostream &O);
|
||||
|
||||
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
|
||||
|
||||
void EmitFunctionBodyEnd();
|
||||
|
||||
MCSymbol *GetCPISymbol(unsigned CPID) const;
|
||||
void EmitEndOfAsmFile(Module &M);
|
||||
ARM64FunctionInfo *ARM64FI;
|
||||
|
||||
/// \brief Emit the LOHs contained in ARM64FI.
|
||||
void EmitLOHs();
|
||||
|
||||
typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol;
|
||||
MInstToMCSymbol LOHInstToLabel;
|
||||
unsigned LOHLabelCounter;
|
||||
};
|
||||
|
||||
} // end of anonymous namespace
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
void ARM64AsmPrinter::EmitEndOfAsmFile(Module &M) {
|
||||
// Funny Darwin hack: This flag tells the linker that no global symbols
|
||||
// contain code that falls through to other global symbols (e.g. the obvious
|
||||
// implementation of multiple entry points). If this doesn't occur, the
|
||||
// linker can safely perform dead code stripping. Since LLVM never
|
||||
// generates code that does this, it is always safe to set.
|
||||
OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
|
||||
SM.serializeToStackMapSection();
|
||||
}
|
||||
|
||||
MachineLocation
|
||||
ARM64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
|
||||
MachineLocation Location;
|
||||
assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
|
||||
// Frame address. Currently handles register +- offset only.
|
||||
if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
|
||||
Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
|
||||
else {
|
||||
DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
|
||||
}
|
||||
return Location;
|
||||
}
|
||||
|
||||
void ARM64AsmPrinter::EmitLOHs() {
|
||||
const ARM64FunctionInfo::MILOHDirectives &LOHs =
|
||||
const_cast<const ARM64FunctionInfo *>(ARM64FI)
|
||||
->getLOHContainer()
|
||||
.getDirectives();
|
||||
SmallVector<MCSymbol *, 3> MCArgs;
|
||||
|
||||
for (ARM64FunctionInfo::MILOHDirectives::const_iterator It = LOHs.begin(),
|
||||
EndIt = LOHs.end();
|
||||
It != EndIt; ++It) {
|
||||
const ARM64FunctionInfo::MILOHArgs &MIArgs = It->getArgs();
|
||||
for (ARM64FunctionInfo::MILOHArgs::const_iterator
|
||||
MIArgsIt = MIArgs.begin(),
|
||||
EndMIArgsIt = MIArgs.end();
|
||||
MIArgsIt != EndMIArgsIt; ++MIArgsIt) {
|
||||
MInstToMCSymbol::iterator LabelIt = LOHInstToLabel.find(*MIArgsIt);
|
||||
assert(LabelIt != LOHInstToLabel.end() &&
|
||||
"Label hasn't been inserted for LOH related instruction");
|
||||
MCArgs.push_back(LabelIt->second);
|
||||
}
|
||||
OutStreamer.EmitLOHDirective(It->getKind(), MCArgs);
|
||||
MCArgs.clear();
|
||||
}
|
||||
}
|
||||
|
||||
void ARM64AsmPrinter::EmitFunctionBodyEnd() {
|
||||
if (!ARM64FI->getLOHRelated().empty())
|
||||
EmitLOHs();
|
||||
}
|
||||
|
||||
/// GetCPISymbol - Return the symbol for the specified constant pool entry.
|
||||
MCSymbol *ARM64AsmPrinter::GetCPISymbol(unsigned CPID) const {
|
||||
// Darwin uses a linker-private symbol name for constant-pools (to
|
||||
// avoid addends on the relocation?), ELF has no such concept and
|
||||
// uses a normal private symbol.
|
||||
if (getDataLayout().getLinkerPrivateGlobalPrefix()[0])
|
||||
return OutContext.GetOrCreateSymbol(
|
||||
Twine(getDataLayout().getLinkerPrivateGlobalPrefix()) + "CPI" +
|
||||
Twine(getFunctionNumber()) + "_" + Twine(CPID));
|
||||
|
||||
return OutContext.GetOrCreateSymbol(
|
||||
Twine(getDataLayout().getPrivateGlobalPrefix()) + "CPI" +
|
||||
Twine(getFunctionNumber()) + "_" + Twine(CPID));
|
||||
}
|
||||
|
||||
void ARM64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
|
||||
raw_ostream &O) {
|
||||
const MachineOperand &MO = MI->getOperand(OpNum);
|
||||
switch (MO.getType()) {
|
||||
default:
|
||||
assert(0 && "<unknown operand type>");
|
||||
case MachineOperand::MO_Register: {
|
||||
unsigned Reg = MO.getReg();
|
||||
assert(TargetRegisterInfo::isPhysicalRegister(Reg));
|
||||
assert(!MO.getSubReg() && "Subregs should be eliminated!");
|
||||
O << ARM64InstPrinter::getRegisterName(Reg);
|
||||
break;
|
||||
}
|
||||
case MachineOperand::MO_Immediate: {
|
||||
int64_t Imm = MO.getImm();
|
||||
O << '#' << Imm;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool ARM64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
|
||||
raw_ostream &O) {
|
||||
unsigned Reg = MO.getReg();
|
||||
switch (Mode) {
|
||||
default:
|
||||
return true; // Unknown mode.
|
||||
case 'w':
|
||||
Reg = getWRegFromXReg(Reg);
|
||||
break;
|
||||
case 'x':
|
||||
Reg = getXRegFromWReg(Reg);
|
||||
break;
|
||||
}
|
||||
|
||||
O << ARM64InstPrinter::getRegisterName(Reg);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Prints the register in MO using class RC using the offset in the
|
||||
// new register class. This should not be used for cross class
|
||||
// printing.
|
||||
bool ARM64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
|
||||
const TargetRegisterClass *RC,
|
||||
bool isVector, raw_ostream &O) {
|
||||
assert(MO.isReg() && "Should only get here with a register!");
|
||||
const ARM64RegisterInfo *RI =
|
||||
static_cast<const ARM64RegisterInfo *>(TM.getRegisterInfo());
|
||||
unsigned Reg = MO.getReg();
|
||||
unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
|
||||
assert(RI->regsOverlap(RegToPrint, Reg));
|
||||
O << ARM64InstPrinter::getRegisterName(
|
||||
RegToPrint, isVector ? ARM64::vreg : ARM64::NoRegAltName);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ARM64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
|
||||
unsigned AsmVariant,
|
||||
const char *ExtraCode, raw_ostream &O) {
|
||||
const MachineOperand &MO = MI->getOperand(OpNum);
|
||||
// Does this asm operand have a single letter operand modifier?
|
||||
if (ExtraCode && ExtraCode[0]) {
|
||||
if (ExtraCode[1] != 0)
|
||||
return true; // Unknown modifier.
|
||||
|
||||
switch (ExtraCode[0]) {
|
||||
default:
|
||||
return true; // Unknown modifier.
|
||||
case 'w': // Print W register
|
||||
case 'x': // Print X register
|
||||
if (MO.isReg())
|
||||
return printAsmMRegister(MO, ExtraCode[0], O);
|
||||
if (MO.isImm() && MO.getImm() == 0) {
|
||||
unsigned Reg = ExtraCode[0] == 'w' ? ARM64::WZR : ARM64::XZR;
|
||||
O << ARM64InstPrinter::getRegisterName(Reg);
|
||||
return false;
|
||||
}
|
||||
printOperand(MI, OpNum, O);
|
||||
return false;
|
||||
case 'b': // Print B register.
|
||||
case 'h': // Print H register.
|
||||
case 's': // Print S register.
|
||||
case 'd': // Print D register.
|
||||
case 'q': // Print Q register.
|
||||
if (MO.isReg()) {
|
||||
const TargetRegisterClass *RC;
|
||||
switch (ExtraCode[0]) {
|
||||
case 'b':
|
||||
RC = &ARM64::FPR8RegClass;
|
||||
break;
|
||||
case 'h':
|
||||
RC = &ARM64::FPR16RegClass;
|
||||
break;
|
||||
case 's':
|
||||
RC = &ARM64::FPR32RegClass;
|
||||
break;
|
||||
case 'd':
|
||||
RC = &ARM64::FPR64RegClass;
|
||||
break;
|
||||
case 'q':
|
||||
RC = &ARM64::FPR128RegClass;
|
||||
break;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
return printAsmRegInClass(MO, RC, false /* vector */, O);
|
||||
}
|
||||
printOperand(MI, OpNum, O);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// According to ARM, we should emit x and v registers unless we have a
|
||||
// modifier.
|
||||
if (MO.isReg()) {
|
||||
unsigned Reg = MO.getReg();
|
||||
|
||||
// If this is a w or x register, print an x register.
|
||||
if (ARM64::GPR32allRegClass.contains(Reg) ||
|
||||
ARM64::GPR64allRegClass.contains(Reg))
|
||||
return printAsmMRegister(MO, 'x', O);
|
||||
|
||||
// If this is a b, h, s, d, or q register, print it as a v register.
|
||||
return printAsmRegInClass(MO, &ARM64::FPR128RegClass, true /* vector */, O);
|
||||
}
|
||||
|
||||
printOperand(MI, OpNum, O);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ARM64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
|
||||
unsigned OpNum, unsigned AsmVariant,
|
||||
const char *ExtraCode,
|
||||
raw_ostream &O) {
|
||||
if (ExtraCode && ExtraCode[0])
|
||||
return true; // Unknown modifier.
|
||||
|
||||
const MachineOperand &MO = MI->getOperand(OpNum);
|
||||
assert(MO.isReg() && "unexpected inline asm memory operand");
|
||||
O << "[" << ARM64InstPrinter::getRegisterName(MO.getReg()) << "]";
|
||||
return false;
|
||||
}
|
||||
|
||||
void ARM64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
|
||||
raw_ostream &OS) {
|
||||
unsigned NOps = MI->getNumOperands();
|
||||
assert(NOps == 4);
|
||||
OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
|
||||
// cast away const; DIetc do not take const operands for some reason.
|
||||
DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps - 1).getMetadata()));
|
||||
OS << V.getName();
|
||||
OS << " <- ";
|
||||
// Frame address. Currently handles register +- offset only.
|
||||
assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
|
||||
OS << '[';
|
||||
printOperand(MI, 0, OS);
|
||||
OS << '+';
|
||||
printOperand(MI, 1, OS);
|
||||
OS << ']';
|
||||
OS << "+";
|
||||
printOperand(MI, NOps - 2, OS);
|
||||
}
|
||||
|
||||
void ARM64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
|
||||
const MachineInstr &MI) {
|
||||
unsigned NumNOPBytes = MI.getOperand(1).getImm();
|
||||
|
||||
SM.recordStackMap(MI);
|
||||
// Emit padding.
|
||||
assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
|
||||
for (unsigned i = 0; i < NumNOPBytes; i += 4)
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::HINT).addImm(0));
|
||||
}
|
||||
|
||||
// Lower a patchpoint of the form:
|
||||
// [<def>], <id>, <numBytes>, <target>, <numArgs>
|
||||
void ARM64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
|
||||
const MachineInstr &MI) {
|
||||
SM.recordPatchPoint(MI);
|
||||
|
||||
PatchPointOpers Opers(&MI);
|
||||
|
||||
int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
|
||||
unsigned EncodedBytes = 0;
|
||||
if (CallTarget) {
|
||||
assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
|
||||
"High 16 bits of call target should be zero.");
|
||||
unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
|
||||
EncodedBytes = 16;
|
||||
// Materialize the jump address:
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVZWi)
|
||||
.addReg(ScratchReg)
|
||||
.addImm((CallTarget >> 32) & 0xFFFF)
|
||||
.addImm(32));
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVKWi)
|
||||
.addReg(ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm((CallTarget >> 16) & 0xFFFF)
|
||||
.addImm(16));
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVKWi)
|
||||
.addReg(ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(CallTarget & 0xFFFF)
|
||||
.addImm(0));
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::BLR).addReg(ScratchReg));
|
||||
}
|
||||
// Emit padding.
|
||||
unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
|
||||
assert(NumBytes >= EncodedBytes &&
|
||||
"Patchpoint can't request size less than the length of a call.");
|
||||
assert((NumBytes - EncodedBytes) % 4 == 0 &&
|
||||
"Invalid number of NOP bytes requested!");
|
||||
for (unsigned i = EncodedBytes; i < NumBytes; i += 4)
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::HINT).addImm(0));
|
||||
}
|
||||
|
||||
// Simple pseudo-instructions have their lowering (with expansion to real
|
||||
// instructions) auto-generated.
|
||||
#include "ARM64GenMCPseudoLowering.inc"
|
||||
|
||||
static unsigned getRealIndexedOpcode(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case ARM64::LDRXpre_isel: return ARM64::LDRXpre;
|
||||
case ARM64::LDRWpre_isel: return ARM64::LDRWpre;
|
||||
case ARM64::LDRDpre_isel: return ARM64::LDRDpre;
|
||||
case ARM64::LDRSpre_isel: return ARM64::LDRSpre;
|
||||
case ARM64::LDRBBpre_isel: return ARM64::LDRBBpre;
|
||||
case ARM64::LDRHHpre_isel: return ARM64::LDRHHpre;
|
||||
case ARM64::LDRSBWpre_isel: return ARM64::LDRSBWpre;
|
||||
case ARM64::LDRSBXpre_isel: return ARM64::LDRSBXpre;
|
||||
case ARM64::LDRSHWpre_isel: return ARM64::LDRSHWpre;
|
||||
case ARM64::LDRSHXpre_isel: return ARM64::LDRSHXpre;
|
||||
case ARM64::LDRSWpre_isel: return ARM64::LDRSWpre;
|
||||
|
||||
case ARM64::LDRDpost_isel: return ARM64::LDRDpost;
|
||||
case ARM64::LDRSpost_isel: return ARM64::LDRSpost;
|
||||
case ARM64::LDRXpost_isel: return ARM64::LDRXpost;
|
||||
case ARM64::LDRWpost_isel: return ARM64::LDRWpost;
|
||||
case ARM64::LDRHHpost_isel: return ARM64::LDRHHpost;
|
||||
case ARM64::LDRBBpost_isel: return ARM64::LDRBBpost;
|
||||
case ARM64::LDRSWpost_isel: return ARM64::LDRSWpost;
|
||||
case ARM64::LDRSHWpost_isel: return ARM64::LDRSHWpost;
|
||||
case ARM64::LDRSHXpost_isel: return ARM64::LDRSHXpost;
|
||||
case ARM64::LDRSBWpost_isel: return ARM64::LDRSBWpost;
|
||||
case ARM64::LDRSBXpost_isel: return ARM64::LDRSBXpost;
|
||||
|
||||
case ARM64::STRXpre_isel: return ARM64::STRXpre;
|
||||
case ARM64::STRWpre_isel: return ARM64::STRWpre;
|
||||
case ARM64::STRHHpre_isel: return ARM64::STRHHpre;
|
||||
case ARM64::STRBBpre_isel: return ARM64::STRBBpre;
|
||||
case ARM64::STRDpre_isel: return ARM64::STRDpre;
|
||||
case ARM64::STRSpre_isel: return ARM64::STRSpre;
|
||||
}
|
||||
llvm_unreachable("Unexpected pre-indexed opcode!");
|
||||
}
|
||||
|
||||
void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
// Do any auto-generated pseudo lowerings.
|
||||
if (emitPseudoExpansionLowering(OutStreamer, MI))
|
||||
return;
|
||||
|
||||
if (ARM64FI->getLOHRelated().count(MI)) {
|
||||
// Generate a label for LOH related instruction
|
||||
MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++);
|
||||
// Associate the instruction with the label
|
||||
LOHInstToLabel[MI] = LOHLabel;
|
||||
OutStreamer.EmitLabel(LOHLabel);
|
||||
}
|
||||
|
||||
// Do any manual lowerings.
|
||||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
break;
|
||||
case ARM64::DBG_VALUE: {
|
||||
if (isVerbose() && OutStreamer.hasRawTextSupport()) {
|
||||
SmallString<128> TmpStr;
|
||||
raw_svector_ostream OS(TmpStr);
|
||||
PrintDebugValueComment(MI, OS);
|
||||
OutStreamer.EmitRawText(StringRef(OS.str()));
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Indexed loads and stores use a pseudo to handle complex operand
|
||||
// tricks and writeback to the base register. We strip off the writeback
|
||||
// operand and switch the opcode here. Post-indexed stores were handled by the
|
||||
// tablegen'erated pseudos above. (The complex operand <--> simple
|
||||
// operand isel is beyond tablegen's ability, so we do these manually).
|
||||
case ARM64::LDRHHpre_isel:
|
||||
case ARM64::LDRBBpre_isel:
|
||||
case ARM64::LDRXpre_isel:
|
||||
case ARM64::LDRWpre_isel:
|
||||
case ARM64::LDRDpre_isel:
|
||||
case ARM64::LDRSpre_isel:
|
||||
case ARM64::LDRSBWpre_isel:
|
||||
case ARM64::LDRSBXpre_isel:
|
||||
case ARM64::LDRSHWpre_isel:
|
||||
case ARM64::LDRSHXpre_isel:
|
||||
case ARM64::LDRSWpre_isel:
|
||||
case ARM64::LDRDpost_isel:
|
||||
case ARM64::LDRSpost_isel:
|
||||
case ARM64::LDRXpost_isel:
|
||||
case ARM64::LDRWpost_isel:
|
||||
case ARM64::LDRHHpost_isel:
|
||||
case ARM64::LDRBBpost_isel:
|
||||
case ARM64::LDRSWpost_isel:
|
||||
case ARM64::LDRSHWpost_isel:
|
||||
case ARM64::LDRSHXpost_isel:
|
||||
case ARM64::LDRSBWpost_isel:
|
||||
case ARM64::LDRSBXpost_isel: {
|
||||
MCInst TmpInst;
|
||||
// For loads, the writeback operand to be skipped is the second.
|
||||
TmpInst.setOpcode(getRealIndexedOpcode(MI->getOpcode()));
|
||||
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
|
||||
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(2).getReg()));
|
||||
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
|
||||
EmitToStreamer(OutStreamer, TmpInst);
|
||||
return;
|
||||
}
|
||||
case ARM64::STRXpre_isel:
|
||||
case ARM64::STRWpre_isel:
|
||||
case ARM64::STRHHpre_isel:
|
||||
case ARM64::STRBBpre_isel:
|
||||
case ARM64::STRDpre_isel:
|
||||
case ARM64::STRSpre_isel: {
|
||||
MCInst TmpInst;
|
||||
// For loads, the writeback operand to be skipped is the first.
|
||||
TmpInst.setOpcode(getRealIndexedOpcode(MI->getOpcode()));
|
||||
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
|
||||
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(2).getReg()));
|
||||
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
|
||||
EmitToStreamer(OutStreamer, TmpInst);
|
||||
return;
|
||||
}
|
||||
|
||||
// Tail calls use pseudo instructions so they have the proper code-gen
|
||||
// attributes (isCall, isReturn, etc.). We lower them to the real
|
||||
// instruction here.
|
||||
case ARM64::TCRETURNri: {
|
||||
MCInst TmpInst;
|
||||
TmpInst.setOpcode(ARM64::BR);
|
||||
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
|
||||
EmitToStreamer(OutStreamer, TmpInst);
|
||||
return;
|
||||
}
|
||||
case ARM64::TCRETURNdi: {
|
||||
MCOperand Dest;
|
||||
MCInstLowering.lowerOperand(MI->getOperand(0), Dest);
|
||||
MCInst TmpInst;
|
||||
TmpInst.setOpcode(ARM64::B);
|
||||
TmpInst.addOperand(Dest);
|
||||
EmitToStreamer(OutStreamer, TmpInst);
|
||||
return;
|
||||
}
|
||||
case ARM64::TLSDESC_BLR: {
|
||||
MCOperand Callee, Sym;
|
||||
MCInstLowering.lowerOperand(MI->getOperand(0), Callee);
|
||||
MCInstLowering.lowerOperand(MI->getOperand(1), Sym);
|
||||
|
||||
// First emit a relocation-annotation. This expands to no code, but requests
|
||||
// the following instruction gets an R_AARCH64_TLSDESC_CALL.
|
||||
MCInst TLSDescCall;
|
||||
TLSDescCall.setOpcode(ARM64::TLSDESCCALL);
|
||||
TLSDescCall.addOperand(Sym);
|
||||
EmitToStreamer(OutStreamer, TLSDescCall);
|
||||
|
||||
// Other than that it's just a normal indirect call to the function loaded
|
||||
// from the descriptor.
|
||||
MCInst BLR;
|
||||
BLR.setOpcode(ARM64::BLR);
|
||||
BLR.addOperand(Callee);
|
||||
EmitToStreamer(OutStreamer, BLR);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
case TargetOpcode::STACKMAP:
|
||||
return LowerSTACKMAP(OutStreamer, SM, *MI);
|
||||
|
||||
case TargetOpcode::PATCHPOINT:
|
||||
return LowerPATCHPOINT(OutStreamer, SM, *MI);
|
||||
}
|
||||
|
||||
// Finally, do the automated lowerings for everything else.
|
||||
MCInst TmpInst;
|
||||
MCInstLowering.Lower(MI, TmpInst);
|
||||
EmitToStreamer(OutStreamer, TmpInst);
|
||||
}
|
||||
|
||||
// Force static initialization.
|
||||
extern "C" void LLVMInitializeARM64AsmPrinter() {
|
||||
RegisterAsmPrinter<ARM64AsmPrinter> X(TheARM64Target);
|
||||
}
|
506
lib/Target/ARM64/ARM64BranchRelaxation.cpp
Normal file
506
lib/Target/ARM64/ARM64BranchRelaxation.cpp
Normal file
@ -0,0 +1,506 @@
|
||||
//===-- ARM64BranchRelaxation.cpp - ARM64 branch relaxation ---------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "arm64-branch-relax"
|
||||
#include "ARM64.h"
|
||||
#include "ARM64InstrInfo.h"
|
||||
#include "ARM64MachineFunctionInfo.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/Format.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool>
|
||||
BranchRelaxation("arm64-branch-relax", cl::Hidden, cl::init(true),
|
||||
cl::desc("Relax out of range conditional branches"));
|
||||
|
||||
static cl::opt<unsigned>
|
||||
TBZDisplacementBits("arm64-tbz-offset-bits", cl::Hidden, cl::init(14),
|
||||
cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
|
||||
|
||||
static cl::opt<unsigned>
|
||||
CBZDisplacementBits("arm64-cbz-offset-bits", cl::Hidden, cl::init(19),
|
||||
cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
|
||||
|
||||
static cl::opt<unsigned>
|
||||
BCCDisplacementBits("arm64-bcc-offset-bits", cl::Hidden, cl::init(19),
|
||||
cl::desc("Restrict range of Bcc instructions (DEBUG)"));
|
||||
|
||||
STATISTIC(NumSplit, "Number of basic blocks split");
|
||||
STATISTIC(NumRelaxed, "Number of conditional branches relaxed");
|
||||
|
||||
namespace {
|
||||
class ARM64BranchRelaxation : public MachineFunctionPass {
|
||||
/// BasicBlockInfo - Information about the offset and size of a single
|
||||
/// basic block.
|
||||
struct BasicBlockInfo {
|
||||
/// Offset - Distance from the beginning of the function to the beginning
|
||||
/// of this basic block.
|
||||
///
|
||||
/// The offset is always aligned as required by the basic block.
|
||||
unsigned Offset;
|
||||
|
||||
/// Size - Size of the basic block in bytes. If the block contains
|
||||
/// inline assembly, this is a worst case estimate.
|
||||
///
|
||||
/// The size does not include any alignment padding whether from the
|
||||
/// beginning of the block, or from an aligned jump table at the end.
|
||||
unsigned Size;
|
||||
|
||||
BasicBlockInfo() : Offset(0), Size(0) {}
|
||||
|
||||
/// Compute the offset immediately following this block. If LogAlign is
|
||||
/// specified, return the offset the successor block will get if it has
|
||||
/// this alignment.
|
||||
unsigned postOffset(unsigned LogAlign = 0) const {
|
||||
unsigned PO = Offset + Size;
|
||||
unsigned Align = 1 << LogAlign;
|
||||
return (PO + Align - 1) / Align * Align;
|
||||
}
|
||||
};
|
||||
|
||||
SmallVector<BasicBlockInfo, 16> BlockInfo;
|
||||
|
||||
MachineFunction *MF;
|
||||
const ARM64InstrInfo *TII;
|
||||
|
||||
bool relaxBranchInstructions();
|
||||
void scanFunction();
|
||||
MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
|
||||
void adjustBlockOffsets(MachineBasicBlock *BB);
|
||||
bool isBlockInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
|
||||
bool fixupConditionalBranch(MachineInstr *MI);
|
||||
void computeBlockSize(MachineBasicBlock *MBB);
|
||||
unsigned getInstrOffset(MachineInstr *MI) const;
|
||||
void dumpBBs();
|
||||
void verify();
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
ARM64BranchRelaxation() : MachineFunctionPass(ID) {}
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "ARM64 branch relaxation pass";
|
||||
}
|
||||
};
|
||||
char ARM64BranchRelaxation::ID = 0;
|
||||
}
|
||||
|
||||
/// verify - check BBOffsets, BBSizes, alignment of islands
|
||||
void ARM64BranchRelaxation::verify() {
|
||||
#ifndef NDEBUG
|
||||
unsigned PrevNum = MF->begin()->getNumber();
|
||||
for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E;
|
||||
++MBBI) {
|
||||
MachineBasicBlock *MBB = MBBI;
|
||||
unsigned Align = MBB->getAlignment();
|
||||
unsigned Num = MBB->getNumber();
|
||||
assert(BlockInfo[Num].Offset % (1u << Align) == 0);
|
||||
assert(!Num || BlockInfo[PrevNum].postOffset() <= BlockInfo[Num].Offset);
|
||||
PrevNum = Num;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/// print block size and offset information - debugging
|
||||
void ARM64BranchRelaxation::dumpBBs() {
|
||||
for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E;
|
||||
++MBBI) {
|
||||
const BasicBlockInfo &BBI = BlockInfo[MBBI->getNumber()];
|
||||
dbgs() << format("BB#%u\toffset=%08x\t", MBBI->getNumber(), BBI.Offset)
|
||||
<< format("size=%#x\n", BBI.Size);
|
||||
}
|
||||
}
|
||||
|
||||
/// BBHasFallthrough - Return true if the specified basic block can fallthrough
|
||||
/// into the block immediately after it.
|
||||
static bool BBHasFallthrough(MachineBasicBlock *MBB) {
|
||||
// Get the next machine basic block in the function.
|
||||
MachineFunction::iterator MBBI = MBB;
|
||||
// Can't fall off end of function.
|
||||
if (std::next(MBBI) == MBB->getParent()->end())
|
||||
return false;
|
||||
|
||||
MachineBasicBlock *NextBB = std::next(MBBI);
|
||||
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
|
||||
E = MBB->succ_end();
|
||||
I != E; ++I)
|
||||
if (*I == NextBB)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// scanFunction - Do the initial scan of the function, building up
|
||||
/// information about each block.
|
||||
void ARM64BranchRelaxation::scanFunction() {
|
||||
BlockInfo.clear();
|
||||
BlockInfo.resize(MF->getNumBlockIDs());
|
||||
|
||||
// First thing, compute the size of all basic blocks, and see if the function
|
||||
// has any inline assembly in it. If so, we have to be conservative about
|
||||
// alignment assumptions, as we don't know for sure the size of any
|
||||
// instructions in the inline assembly.
|
||||
for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
|
||||
computeBlockSize(I);
|
||||
|
||||
// Compute block offsets and known bits.
|
||||
adjustBlockOffsets(MF->begin());
|
||||
}
|
||||
|
||||
/// computeBlockSize - Compute the size for MBB.
|
||||
/// This function updates BlockInfo directly.
|
||||
void ARM64BranchRelaxation::computeBlockSize(MachineBasicBlock *MBB) {
|
||||
unsigned Size = 0;
|
||||
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
|
||||
++I)
|
||||
Size += TII->GetInstSizeInBytes(I);
|
||||
BlockInfo[MBB->getNumber()].Size = Size;
|
||||
}
|
||||
|
||||
/// getInstrOffset - Return the current offset of the specified machine
|
||||
/// instruction from the start of the function. This offset changes as stuff is
|
||||
/// moved around inside the function.
|
||||
unsigned ARM64BranchRelaxation::getInstrOffset(MachineInstr *MI) const {
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
|
||||
// The offset is composed of two things: the sum of the sizes of all MBB's
|
||||
// before this instruction's block, and the offset from the start of the block
|
||||
// it is in.
|
||||
unsigned Offset = BlockInfo[MBB->getNumber()].Offset;
|
||||
|
||||
// Sum instructions before MI in MBB.
|
||||
for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
|
||||
assert(I != MBB->end() && "Didn't find MI in its own basic block?");
|
||||
Offset += TII->GetInstSizeInBytes(I);
|
||||
}
|
||||
return Offset;
|
||||
}
|
||||
|
||||
void ARM64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock *Start) {
|
||||
unsigned PrevNum = Start->getNumber();
|
||||
MachineFunction::iterator MBBI = Start, E = MF->end();
|
||||
for (++MBBI; MBBI != E; ++MBBI) {
|
||||
MachineBasicBlock *MBB = MBBI;
|
||||
unsigned Num = MBB->getNumber();
|
||||
if (!Num) // block zero is never changed from offset zero.
|
||||
continue;
|
||||
// Get the offset and known bits at the end of the layout predecessor.
|
||||
// Include the alignment of the current block.
|
||||
unsigned LogAlign = MBBI->getAlignment();
|
||||
BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(LogAlign);
|
||||
PrevNum = Num;
|
||||
}
|
||||
}
|
||||
|
||||
/// Split the basic block containing MI into two blocks, which are joined by
|
||||
/// an unconditional branch. Update data structures and renumber blocks to
|
||||
/// account for this change and returns the newly created block.
|
||||
/// NOTE: Successor list of the original BB is out of date after this function,
|
||||
/// and must be updated by the caller! Other transforms follow using this
|
||||
/// utility function, so no point updating now rather than waiting.
|
||||
MachineBasicBlock *
|
||||
ARM64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
|
||||
MachineBasicBlock *OrigBB = MI->getParent();
|
||||
|
||||
// Create a new MBB for the code after the OrigBB.
|
||||
MachineBasicBlock *NewBB =
|
||||
MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
|
||||
MachineFunction::iterator MBBI = OrigBB;
|
||||
++MBBI;
|
||||
MF->insert(MBBI, NewBB);
|
||||
|
||||
// Splice the instructions starting with MI over to NewBB.
|
||||
NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
|
||||
|
||||
// Add an unconditional branch from OrigBB to NewBB.
|
||||
// Note the new unconditional branch is not being recorded.
|
||||
// There doesn't seem to be meaningful DebugInfo available; this doesn't
|
||||
// correspond to anything in the source.
|
||||
BuildMI(OrigBB, DebugLoc(), TII->get(ARM64::B)).addMBB(NewBB);
|
||||
|
||||
// Insert an entry into BlockInfo to align it properly with the block numbers.
|
||||
BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
|
||||
|
||||
// Figure out how large the OrigBB is. As the first half of the original
|
||||
// block, it cannot contain a tablejump. The size includes
|
||||
// the new jump we added. (It should be possible to do this without
|
||||
// recounting everything, but it's very confusing, and this is rarely
|
||||
// executed.)
|
||||
computeBlockSize(OrigBB);
|
||||
|
||||
// Figure out how large the NewMBB is. As the second half of the original
|
||||
// block, it may contain a tablejump.
|
||||
computeBlockSize(NewBB);
|
||||
|
||||
// All BBOffsets following these blocks must be modified.
|
||||
adjustBlockOffsets(OrigBB);
|
||||
|
||||
++NumSplit;
|
||||
|
||||
return NewBB;
|
||||
}
|
||||
|
||||
/// isBlockInRange - Returns true if the distance between specific MI and
|
||||
/// specific BB can fit in MI's displacement field.
|
||||
bool ARM64BranchRelaxation::isBlockInRange(MachineInstr *MI,
|
||||
MachineBasicBlock *DestBB,
|
||||
unsigned Bits) {
|
||||
unsigned MaxOffs = ((1 << (Bits - 1)) - 1) << 2;
|
||||
unsigned BrOffset = getInstrOffset(MI);
|
||||
unsigned DestOffset = BlockInfo[DestBB->getNumber()].Offset;
|
||||
|
||||
DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
|
||||
<< " from BB#" << MI->getParent()->getNumber()
|
||||
<< " max delta=" << MaxOffs << " from " << getInstrOffset(MI)
|
||||
<< " to " << DestOffset << " offset "
|
||||
<< int(DestOffset - BrOffset) << "\t" << *MI);
|
||||
|
||||
// Branch before the Dest.
|
||||
if (BrOffset <= DestOffset)
|
||||
return (DestOffset - BrOffset <= MaxOffs);
|
||||
return (BrOffset - DestOffset <= MaxOffs);
|
||||
}
|
||||
|
||||
static bool isConditionalBranch(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
default:
|
||||
return false;
|
||||
case ARM64::TBZ:
|
||||
case ARM64::TBNZ:
|
||||
case ARM64::CBZW:
|
||||
case ARM64::CBNZW:
|
||||
case ARM64::CBZX:
|
||||
case ARM64::CBNZX:
|
||||
case ARM64::Bcc:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
|
||||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
assert(0 && "unexpected opcode!");
|
||||
case ARM64::TBZ:
|
||||
case ARM64::TBNZ:
|
||||
return MI->getOperand(2).getMBB();
|
||||
case ARM64::CBZW:
|
||||
case ARM64::CBNZW:
|
||||
case ARM64::CBZX:
|
||||
case ARM64::CBNZX:
|
||||
case ARM64::Bcc:
|
||||
return MI->getOperand(1).getMBB();
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned getOppositeConditionOpcode(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
default:
|
||||
assert(0 && "unexpected opcode!");
|
||||
case ARM64::TBNZ: return ARM64::TBZ;
|
||||
case ARM64::TBZ: return ARM64::TBNZ;
|
||||
case ARM64::CBNZW: return ARM64::CBZW;
|
||||
case ARM64::CBNZX: return ARM64::CBZX;
|
||||
case ARM64::CBZW: return ARM64::CBNZW;
|
||||
case ARM64::CBZX: return ARM64::CBNZX;
|
||||
case ARM64::Bcc: return ARM64::Bcc; // Condition is an operand for Bcc.
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned getBranchDisplacementBits(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
default:
|
||||
assert(0 && "unexpected opcode!");
|
||||
case ARM64::TBNZ:
|
||||
case ARM64::TBZ:
|
||||
return TBZDisplacementBits;
|
||||
case ARM64::CBNZW:
|
||||
case ARM64::CBZW:
|
||||
case ARM64::CBNZX:
|
||||
case ARM64::CBZX:
|
||||
return CBZDisplacementBits;
|
||||
case ARM64::Bcc:
|
||||
return BCCDisplacementBits;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void invertBccCondition(MachineInstr *MI) {
|
||||
assert(MI->getOpcode() == ARM64::Bcc && "Unexpected opcode!");
|
||||
ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(0).getImm();
|
||||
CC = ARM64CC::getInvertedCondCode(CC);
|
||||
MI->getOperand(0).setImm((int64_t)CC);
|
||||
}
|
||||
|
||||
/// fixupConditionalBranch - Fix up a conditional branch whose destination is
|
||||
/// too far away to fit in its displacement field. It is converted to an inverse
|
||||
/// conditional branch + an unconditional branch to the destination.
|
||||
bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
|
||||
MachineBasicBlock *DestBB = getDestBlock(MI);
|
||||
|
||||
// Add an unconditional branch to the destination and invert the branch
|
||||
// condition to jump over it:
|
||||
// tbz L1
|
||||
// =>
|
||||
// tbnz L2
|
||||
// b L1
|
||||
// L2:
|
||||
|
||||
// If the branch is at the end of its MBB and that has a fall-through block,
|
||||
// direct the updated conditional branch to the fall-through block. Otherwise,
|
||||
// split the MBB before the next instruction.
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
MachineInstr *BMI = &MBB->back();
|
||||
bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
|
||||
|
||||
if (BMI != MI) {
|
||||
if (std::next(MachineBasicBlock::iterator(MI)) ==
|
||||
std::prev(MBB->getLastNonDebugInstr()) &&
|
||||
BMI->getOpcode() == ARM64::B) {
|
||||
// Last MI in the BB is an unconditional branch. Can we simply invert the
|
||||
// condition and swap destinations:
|
||||
// beq L1
|
||||
// b L2
|
||||
// =>
|
||||
// bne L2
|
||||
// b L1
|
||||
MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
|
||||
if (isBlockInRange(MI, NewDest,
|
||||
getBranchDisplacementBits(MI->getOpcode()))) {
|
||||
DEBUG(dbgs() << " Invert condition and swap its destination with "
|
||||
<< *BMI);
|
||||
BMI->getOperand(0).setMBB(DestBB);
|
||||
unsigned OpNum =
|
||||
(MI->getOpcode() == ARM64::TBZ || MI->getOpcode() == ARM64::TBNZ)
|
||||
? 2
|
||||
: 1;
|
||||
MI->getOperand(OpNum).setMBB(NewDest);
|
||||
MI->setDesc(TII->get(getOppositeConditionOpcode(MI->getOpcode())));
|
||||
if (MI->getOpcode() == ARM64::Bcc)
|
||||
invertBccCondition(MI);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (NeedSplit) {
|
||||
// Analyze the branch so we know how to update the successor lists.
|
||||
MachineBasicBlock *TBB, *FBB;
|
||||
SmallVector<MachineOperand, 2> Cond;
|
||||
TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, false);
|
||||
|
||||
MachineBasicBlock *NewBB = splitBlockBeforeInstr(MI);
|
||||
// No need for the branch to the next block. We're adding an unconditional
|
||||
// branch to the destination.
|
||||
int delta = TII->GetInstSizeInBytes(&MBB->back());
|
||||
BlockInfo[MBB->getNumber()].Size -= delta;
|
||||
MBB->back().eraseFromParent();
|
||||
// BlockInfo[SplitBB].Offset is wrong temporarily, fixed below
|
||||
|
||||
// Update the successor lists according to the transformation to follow.
|
||||
// Do it here since if there's no split, no update is needed.
|
||||
MBB->replaceSuccessor(FBB, NewBB);
|
||||
NewBB->addSuccessor(FBB);
|
||||
}
|
||||
MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
|
||||
|
||||
DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
|
||||
<< ", invert condition and change dest. to BB#"
|
||||
<< NextBB->getNumber() << "\n");
|
||||
|
||||
// Insert a new conditional branch and a new unconditional branch.
|
||||
MachineInstrBuilder MIB = BuildMI(
|
||||
MBB, DebugLoc(), TII->get(getOppositeConditionOpcode(MI->getOpcode())))
|
||||
.addOperand(MI->getOperand(0));
|
||||
if (MI->getOpcode() == ARM64::TBZ || MI->getOpcode() == ARM64::TBNZ)
|
||||
MIB.addOperand(MI->getOperand(1));
|
||||
if (MI->getOpcode() == ARM64::Bcc)
|
||||
invertBccCondition(MIB);
|
||||
MIB.addMBB(NextBB);
|
||||
BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
|
||||
BuildMI(MBB, DebugLoc(), TII->get(ARM64::B)).addMBB(DestBB);
|
||||
BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
|
||||
|
||||
// Remove the old conditional branch. It may or may not still be in MBB.
|
||||
BlockInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
|
||||
MI->eraseFromParent();
|
||||
|
||||
// Finally, keep the block offsets up to date.
|
||||
adjustBlockOffsets(MBB);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARM64BranchRelaxation::relaxBranchInstructions() {
|
||||
bool Changed = false;
|
||||
// Relaxing branches involves creating new basic blocks, so re-eval
|
||||
// end() for termination.
|
||||
for (MachineFunction::iterator I = MF->begin(); I != MF->end(); ++I) {
|
||||
MachineInstr *MI = I->getFirstTerminator();
|
||||
if (isConditionalBranch(MI->getOpcode()) &&
|
||||
!isBlockInRange(MI, getDestBlock(MI),
|
||||
getBranchDisplacementBits(MI->getOpcode()))) {
|
||||
fixupConditionalBranch(MI);
|
||||
++NumRelaxed;
|
||||
Changed = true;
|
||||
}
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool ARM64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
|
||||
MF = &mf;
|
||||
|
||||
// If the pass is disabled, just bail early.
|
||||
if (!BranchRelaxation)
|
||||
return false;
|
||||
|
||||
DEBUG(dbgs() << "***** ARM64BranchRelaxation *****\n");
|
||||
|
||||
TII = (const ARM64InstrInfo *)MF->getTarget().getInstrInfo();
|
||||
|
||||
// Renumber all of the machine basic blocks in the function, guaranteeing that
|
||||
// the numbers agree with the position of the block in the function.
|
||||
MF->RenumberBlocks();
|
||||
|
||||
// Do the initial scan of the function, building up information about the
|
||||
// sizes of each block.
|
||||
scanFunction();
|
||||
|
||||
DEBUG(dbgs() << " Basic blocks before relaxation\n");
|
||||
DEBUG(dumpBBs());
|
||||
|
||||
bool MadeChange = false;
|
||||
while (relaxBranchInstructions())
|
||||
MadeChange = true;
|
||||
|
||||
// After a while, this might be made debug-only, but it is not expensive.
|
||||
verify();
|
||||
|
||||
DEBUG(dbgs() << " Basic blocks after relaxation\n");
|
||||
DEBUG(dbgs() << '\n'; dumpBBs());
|
||||
|
||||
BlockInfo.clear();
|
||||
|
||||
return MadeChange;
|
||||
}
|
||||
|
||||
/// createARM64BranchRelaxation - returns an instance of the constpool
|
||||
/// island pass.
|
||||
FunctionPass *llvm::createARM64BranchRelaxation() {
|
||||
return new ARM64BranchRelaxation();
|
||||
}
|
94
lib/Target/ARM64/ARM64CallingConv.h
Normal file
94
lib/Target/ARM64/ARM64CallingConv.h
Normal file
@ -0,0 +1,94 @@
|
||||
//=== ARM64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the custom routines for the ARM64 Calling Convention that
|
||||
// aren't done by tablegen.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef ARM64CALLINGCONV_H
|
||||
#define ARM64CALLINGCONV_H
|
||||
|
||||
#include "ARM64InstrInfo.h"
|
||||
#include "llvm/IR/CallingConv.h"
|
||||
#include "llvm/CodeGen/CallingConvLower.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// CC_ARM64_Custom_i1i8i16_Reg - customized handling of passing i1/i8/i16 via
|
||||
/// register. Here, ValVT can be i1/i8/i16 or i32 depending on whether the
|
||||
/// argument is already promoted and LocVT is i1/i8/i16. We only promote the
|
||||
/// argument to i32 if we are sure this argument will be passed in register.
|
||||
static bool CC_ARM64_Custom_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT,
|
||||
CCValAssign::LocInfo LocInfo,
|
||||
ISD::ArgFlagsTy ArgFlags,
|
||||
CCState &State,
|
||||
bool IsWebKitJS = false) {
|
||||
static const uint16_t RegList1[] = { ARM64::W0, ARM64::W1, ARM64::W2,
|
||||
ARM64::W3, ARM64::W4, ARM64::W5,
|
||||
ARM64::W6, ARM64::W7 };
|
||||
static const uint16_t RegList2[] = { ARM64::X0, ARM64::X1, ARM64::X2,
|
||||
ARM64::X3, ARM64::X4, ARM64::X5,
|
||||
ARM64::X6, ARM64::X7 };
|
||||
static const uint16_t WebKitRegList1[] = { ARM64::W0 };
|
||||
static const uint16_t WebKitRegList2[] = { ARM64::X0 };
|
||||
|
||||
const uint16_t *List1 = IsWebKitJS ? WebKitRegList1 : RegList1;
|
||||
const uint16_t *List2 = IsWebKitJS ? WebKitRegList2 : RegList2;
|
||||
|
||||
if (unsigned Reg = State.AllocateReg(List1, List2, 8)) {
|
||||
// Customized extra section for handling i1/i8/i16:
|
||||
// We need to promote the argument to i32 if it is not done already.
|
||||
if (ValVT != MVT::i32) {
|
||||
if (ArgFlags.isSExt())
|
||||
LocInfo = CCValAssign::SExt;
|
||||
else if (ArgFlags.isZExt())
|
||||
LocInfo = CCValAssign::ZExt;
|
||||
else
|
||||
LocInfo = CCValAssign::AExt;
|
||||
ValVT = MVT::i32;
|
||||
}
|
||||
// Set LocVT to i32 as well if passing via register.
|
||||
LocVT = MVT::i32;
|
||||
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// CC_ARM64_WebKit_JS_i1i8i16_Reg - customized handling of passing i1/i8/i16
|
||||
/// via register. This behaves the same as CC_ARM64_Custom_i1i8i16_Reg, but only
|
||||
/// uses the first register.
|
||||
static bool CC_ARM64_WebKit_JS_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT,
|
||||
CCValAssign::LocInfo LocInfo,
|
||||
ISD::ArgFlagsTy ArgFlags,
|
||||
CCState &State) {
|
||||
return CC_ARM64_Custom_i1i8i16_Reg(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
|
||||
State, true);
|
||||
}
|
||||
|
||||
/// CC_ARM64_Custom_i1i8i16_Stack: customized handling of passing i1/i8/i16 on
|
||||
/// stack. Here, ValVT can be i1/i8/i16 or i32 depending on whether the argument
|
||||
/// is already promoted and LocVT is i1/i8/i16. If ValVT is already promoted,
|
||||
/// it will be truncated back to i1/i8/i16.
|
||||
static bool CC_ARM64_Custom_i1i8i16_Stack(unsigned ValNo, MVT ValVT, MVT LocVT,
|
||||
CCValAssign::LocInfo LocInfo,
|
||||
ISD::ArgFlagsTy ArgFlags,
|
||||
CCState &State) {
|
||||
unsigned Space = ((LocVT == MVT::i1 || LocVT == MVT::i8) ? 1 : 2);
|
||||
unsigned Offset12 = State.AllocateStack(Space, Space);
|
||||
ValVT = LocVT;
|
||||
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset12, LocVT, LocInfo));
|
||||
return true;
|
||||
}
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif
|
210
lib/Target/ARM64/ARM64CallingConvention.td
Normal file
210
lib/Target/ARM64/ARM64CallingConvention.td
Normal file
@ -0,0 +1,210 @@
|
||||
//===- ARM64CallingConv.td - Calling Conventions for ARM64 -*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This describes the calling conventions for ARM64 architecture.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// CCIfAlign - Match of the original alignment of the arg
|
||||
class CCIfAlign<string Align, CCAction A> :
|
||||
CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ARM AAPCS64 Calling Convention
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def CC_ARM64_AAPCS : CallingConv<[
|
||||
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
|
||||
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
|
||||
|
||||
// An SRet is passed in X8, not X0 like a normal pointer parameter.
|
||||
CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
|
||||
|
||||
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
|
||||
// up to eight each of GPR and FPR.
|
||||
CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Reg">>,
|
||||
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
|
||||
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
|
||||
// i128 is split to two i64s, we can't fit half to register X7.
|
||||
CCIfType<[i64], CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6],
|
||||
[X0, X1, X3, X5]>>>,
|
||||
|
||||
// i128 is split to two i64s, and its stack alignment is 16 bytes.
|
||||
CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
|
||||
|
||||
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
|
||||
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
|
||||
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32],
|
||||
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
|
||||
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
|
||||
// If more than will fit in registers, pass them on the stack instead.
|
||||
CCIfType<[i1, i8, i16], CCAssignToStack<8, 8>>,
|
||||
CCIfType<[i32, f32], CCAssignToStack<8, 8>>,
|
||||
CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8],
|
||||
CCAssignToStack<8, 8>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>>
|
||||
]>;
|
||||
|
||||
def RetCC_ARM64_AAPCS : CallingConv<[
|
||||
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
|
||||
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
|
||||
|
||||
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
|
||||
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
|
||||
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
|
||||
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
|
||||
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32],
|
||||
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
|
||||
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
|
||||
]>;
|
||||
|
||||
|
||||
// Darwin uses a calling convention which differs in only two ways
|
||||
// from the standard one at this level:
|
||||
// + i128s (i.e. split i64s) don't need even registers.
|
||||
// + Stack slots are sized as needed rather than being at least 64-bit.
|
||||
def CC_ARM64_DarwinPCS : CallingConv<[
|
||||
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
|
||||
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
|
||||
|
||||
// An SRet is passed in X8, not X0 like a normal pointer parameter.
|
||||
CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
|
||||
|
||||
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
|
||||
// up to eight each of GPR and FPR.
|
||||
CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Reg">>,
|
||||
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
|
||||
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
|
||||
// i128 is split to two i64s, we can't fit half to register X7.
|
||||
CCIfType<[i64],
|
||||
CCIfSplit<CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6],
|
||||
[W0, W1, W2, W3, W4, W5, W6]>>>,
|
||||
// i128 is split to two i64s, and its stack alignment is 16 bytes.
|
||||
CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,
|
||||
|
||||
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
|
||||
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
|
||||
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32],
|
||||
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
|
||||
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
|
||||
// If more than will fit in registers, pass them on the stack instead.
|
||||
CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Stack">>,
|
||||
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
|
||||
CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8],
|
||||
CCAssignToStack<8, 8>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>>
|
||||
]>;
|
||||
|
||||
def CC_ARM64_DarwinPCS_VarArg : CallingConv<[
|
||||
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
|
||||
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
|
||||
|
||||
// Handle all scalar types as either i64 or f64.
|
||||
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
|
||||
CCIfType<[f32], CCPromoteToType<f64>>,
|
||||
|
||||
// Everything is on the stack.
|
||||
// i128 is split to two i64s, and its stack alignment is 16 bytes.
|
||||
CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
|
||||
CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], CCAssignToStack<8, 8>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>>
|
||||
]>;
|
||||
|
||||
// The WebKit_JS calling convention only passes the first argument (the callee)
|
||||
// in register and the remaining arguments on stack. We allow 32bit stack slots,
|
||||
// so that WebKit can write partial values in the stack and define the other
|
||||
// 32bit quantity as undef.
|
||||
def CC_ARM64_WebKit_JS : CallingConv<[
|
||||
// Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
|
||||
CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_WebKit_JS_i1i8i16_Reg">>,
|
||||
CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>,
|
||||
CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>,
|
||||
|
||||
// Pass the remaining arguments on the stack instead.
|
||||
CCIfType<[i1, i8, i16], CCAssignToStack<4, 4>>,
|
||||
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
|
||||
CCIfType<[i64, f64], CCAssignToStack<8, 8>>
|
||||
]>;
|
||||
|
||||
def RetCC_ARM64_WebKit_JS : CallingConv<[
|
||||
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
|
||||
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
|
||||
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
|
||||
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
|
||||
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
|
||||
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
|
||||
]>;
|
||||
|
||||
// FIXME: LR is only callee-saved in the sense that *we* preserve it and are
|
||||
// presumably a callee to someone. External functions may not do so, but this
|
||||
// is currently safe since BL has LR as an implicit-def and what happens after a
|
||||
// tail call doesn't matter.
|
||||
//
|
||||
// It would be better to model its preservation semantics properly (create a
|
||||
// vreg on entry, use it in RET & tail call generation; make that vreg def if we
|
||||
// end up saving LR as part of a call frame). Watch this space...
|
||||
def CSR_ARM64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
|
||||
X23, X24, X25, X26, X27, X28,
|
||||
D8, D9, D10, D11,
|
||||
D12, D13, D14, D15)>;
|
||||
|
||||
// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
|
||||
// 'this' and the pointer return value are both passed in X0 in these cases,
|
||||
// this can be partially modelled by treating X0 as a callee-saved register;
|
||||
// only the resulting RegMask is used; the SaveList is ignored
|
||||
//
|
||||
// (For generic ARM 64-bit ABI code, clang will not generate constructors or
|
||||
// destructors with 'this' returns, so this RegMask will not be used in that
|
||||
// case)
|
||||
def CSR_ARM64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_ARM64_AAPCS, X0)>;
|
||||
|
||||
// The function used by Darwin to obtain the address of a thread-local variable
|
||||
// guarantees more than a normal AAPCS function. x16 and x17 are used on the
|
||||
// fast path for calculation, but other registers except X0 (argument/return)
|
||||
// and LR (it is a call, after all) are preserved.
|
||||
def CSR_ARM64_TLS_Darwin
|
||||
: CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17),
|
||||
FP,
|
||||
(sequence "Q%u", 0, 31))>;
|
||||
|
||||
// The ELF stub used for TLS-descriptor access saves every feasible
|
||||
// register. Only X0 and LR are clobbered.
|
||||
def CSR_ARM64_TLS_ELF
|
||||
: CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP,
|
||||
(sequence "Q%u", 0, 31))>;
|
||||
|
||||
def CSR_ARM64_AllRegs
|
||||
: CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP,
|
||||
(sequence "X%u", 0, 28), FP, LR, SP,
|
||||
(sequence "B%u", 0, 31), (sequence "H%u", 0, 31),
|
||||
(sequence "S%u", 0, 31), (sequence "D%u", 0, 31),
|
||||
(sequence "Q%u", 0, 31))>;
|
||||
|
148
lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp
Normal file
148
lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp
Normal file
@ -0,0 +1,148 @@
|
||||
//===-- ARM64CleanupLocalDynamicTLSPass.cpp -----------------------*- C++ -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Local-dynamic access to thread-local variables proceeds in three stages.
|
||||
//
|
||||
// 1. The offset of this Module's thread-local area from TPIDR_EL0 is calculated
|
||||
// in much the same way as a general-dynamic TLS-descriptor access against
|
||||
// the special symbol _TLS_MODULE_BASE.
|
||||
// 2. The variable's offset from _TLS_MODULE_BASE_ is calculated using
|
||||
// instructions with "dtprel" modifiers.
|
||||
// 3. These two are added, together with TPIDR_EL0, to obtain the variable's
|
||||
// true address.
|
||||
//
|
||||
// This is only better than general-dynamic access to the variable if two or
|
||||
// more of the first stage TLS-descriptor calculations can be combined. This
|
||||
// pass looks through a function and performs such combinations.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "ARM64.h"
|
||||
#include "ARM64InstrInfo.h"
|
||||
#include "ARM64MachineFunctionInfo.h"
|
||||
#include "ARM64TargetMachine.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
struct LDTLSCleanup : public MachineFunctionPass {
|
||||
static char ID;
|
||||
LDTLSCleanup() : MachineFunctionPass(ID) {}
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF) {
|
||||
ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
|
||||
if (AFI->getNumLocalDynamicTLSAccesses() < 2) {
|
||||
// No point folding accesses if there isn't at least two.
|
||||
return false;
|
||||
}
|
||||
|
||||
MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
|
||||
return VisitNode(DT->getRootNode(), 0);
|
||||
}
|
||||
|
||||
// Visit the dominator subtree rooted at Node in pre-order.
|
||||
// If TLSBaseAddrReg is non-null, then use that to replace any
|
||||
// TLS_base_addr instructions. Otherwise, create the register
|
||||
// when the first such instruction is seen, and then use it
|
||||
// as we encounter more instructions.
|
||||
bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
|
||||
MachineBasicBlock *BB = Node->getBlock();
|
||||
bool Changed = false;
|
||||
|
||||
// Traverse the current block.
|
||||
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
|
||||
++I) {
|
||||
switch (I->getOpcode()) {
|
||||
case ARM64::TLSDESC_BLR:
|
||||
// Make sure it's a local dynamic access.
|
||||
if (!I->getOperand(1).isSymbol() ||
|
||||
strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
|
||||
break;
|
||||
|
||||
if (TLSBaseAddrReg)
|
||||
I = replaceTLSBaseAddrCall(I, TLSBaseAddrReg);
|
||||
else
|
||||
I = setRegister(I, &TLSBaseAddrReg);
|
||||
Changed = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Visit the children of this block in the dominator tree.
|
||||
for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
|
||||
I != E; ++I) {
|
||||
Changed |= VisitNode(*I, TLSBaseAddrReg);
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
// Replace the TLS_base_addr instruction I with a copy from
|
||||
// TLSBaseAddrReg, returning the new instruction.
|
||||
MachineInstr *replaceTLSBaseAddrCall(MachineInstr *I,
|
||||
unsigned TLSBaseAddrReg) {
|
||||
MachineFunction *MF = I->getParent()->getParent();
|
||||
const ARM64TargetMachine *TM =
|
||||
static_cast<const ARM64TargetMachine *>(&MF->getTarget());
|
||||
const ARM64InstrInfo *TII = TM->getInstrInfo();
|
||||
|
||||
// Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
|
||||
// code sequence assumes the address will be.
|
||||
MachineInstr *Copy =
|
||||
BuildMI(*I->getParent(), I, I->getDebugLoc(),
|
||||
TII->get(TargetOpcode::COPY), ARM64::X0).addReg(TLSBaseAddrReg);
|
||||
|
||||
// Erase the TLS_base_addr instruction.
|
||||
I->eraseFromParent();
|
||||
|
||||
return Copy;
|
||||
}
|
||||
|
||||
// Create a virtal register in *TLSBaseAddrReg, and populate it by
|
||||
// inserting a copy instruction after I. Returns the new instruction.
|
||||
MachineInstr *setRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
|
||||
MachineFunction *MF = I->getParent()->getParent();
|
||||
const ARM64TargetMachine *TM =
|
||||
static_cast<const ARM64TargetMachine *>(&MF->getTarget());
|
||||
const ARM64InstrInfo *TII = TM->getInstrInfo();
|
||||
|
||||
// Create a virtual register for the TLS base address.
|
||||
MachineRegisterInfo &RegInfo = MF->getRegInfo();
|
||||
*TLSBaseAddrReg = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
|
||||
|
||||
// Insert a copy from X0 to TLSBaseAddrReg for later.
|
||||
MachineInstr *Next = I->getNextNode();
|
||||
MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
|
||||
TII->get(TargetOpcode::COPY),
|
||||
*TLSBaseAddrReg).addReg(ARM64::X0);
|
||||
|
||||
return Copy;
|
||||
}
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "Local Dynamic TLS Access Clean-up";
|
||||
}
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesCFG();
|
||||
AU.addRequired<MachineDominatorTree>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
char LDTLSCleanup::ID = 0;
|
||||
FunctionPass *llvm::createARM64CleanupLocalDynamicTLSPass() {
|
||||
return new LDTLSCleanup();
|
||||
}
|
1122
lib/Target/ARM64/ARM64CollectLOH.cpp
Normal file
1122
lib/Target/ARM64/ARM64CollectLOH.cpp
Normal file
File diff suppressed because it is too large
Load Diff
918
lib/Target/ARM64/ARM64ConditionalCompares.cpp
Normal file
918
lib/Target/ARM64/ARM64ConditionalCompares.cpp
Normal file
@ -0,0 +1,918 @@
|
||||
//===-- ARM64ConditionalCompares.cpp --- CCMP formation for ARM64 ---------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the ARM64ConditionalCompares pass which reduces
|
||||
// branching and code size by using the conditional compare instructions CCMP,
|
||||
// CCMN, and FCMP.
|
||||
//
|
||||
// The CFG transformations for forming conditional compares are very similar to
|
||||
// if-conversion, and this pass should run immediately before the early
|
||||
// if-conversion pass.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "arm64-ccmp"
|
||||
#include "ARM64.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/ADT/DepthFirstIterator.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SparseSet.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/MachineTraceMetrics.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
// Absolute maximum number of instructions allowed per speculated block.
|
||||
// This bypasses all other heuristics, so it should be set fairly high.
|
||||
static cl::opt<unsigned> BlockInstrLimit(
|
||||
"arm64-ccmp-limit", cl::init(30), cl::Hidden,
|
||||
cl::desc("Maximum number of instructions per speculated block."));
|
||||
|
||||
// Stress testing mode - disable heuristics.
|
||||
static cl::opt<bool> Stress("arm64-stress-ccmp", cl::Hidden,
|
||||
cl::desc("Turn all knobs to 11"));
|
||||
|
||||
STATISTIC(NumConsidered, "Number of ccmps considered");
|
||||
STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
|
||||
STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
|
||||
STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
|
||||
STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
|
||||
STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
|
||||
STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
|
||||
STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
|
||||
STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
|
||||
STATISTIC(NumMultCPSRUses, "Number of ccmps rejected (CPSR used)");
|
||||
STATISTIC(NumUnknCPSRDefs, "Number of ccmps rejected (CPSR def unknown)");
|
||||
|
||||
STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
|
||||
|
||||
STATISTIC(NumConverted, "Number of ccmp instructions created");
|
||||
STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSACCmpConv
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The SSACCmpConv class performs ccmp-conversion on SSA form machine code
|
||||
// after determining if it is possible. The class contains no heuristics;
|
||||
// external code should be used to determine when ccmp-conversion is a good
|
||||
// idea.
|
||||
//
|
||||
// CCmp-formation works on a CFG representing chained conditions, typically
|
||||
// from C's short-circuit || and && operators:
|
||||
//
|
||||
// From: Head To: Head
|
||||
// / | CmpBB
|
||||
// / | / |
|
||||
// | CmpBB / |
|
||||
// | / | Tail |
|
||||
// | / | | |
|
||||
// Tail | | |
|
||||
// | | | |
|
||||
// ... ... ... ...
|
||||
//
|
||||
// The Head block is terminated by a br.cond instruction, and the CmpBB block
|
||||
// contains compare + br.cond. Tail must be a successor of both.
|
||||
//
|
||||
// The cmp-conversion turns the compare instruction in CmpBB into a conditional
|
||||
// compare, and merges CmpBB into Head, speculatively executing its
|
||||
// instructions. The ARM64 conditional compare instructions have an immediate
|
||||
// operand that specifies the NZCV flag values when the condition is false and
|
||||
// the compare isn't executed. This makes it possible to chain compares with
|
||||
// different condition codes.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// if (a == 5 || b == 17)
|
||||
// foo();
|
||||
//
|
||||
// Head:
|
||||
// cmp w0, #5
|
||||
// b.eq Tail
|
||||
// CmpBB:
|
||||
// cmp w1, #17
|
||||
// b.eq Tail
|
||||
// ...
|
||||
// Tail:
|
||||
// bl _foo
|
||||
//
|
||||
// Becomes:
|
||||
//
|
||||
// Head:
|
||||
// cmp w0, #5
|
||||
// ccmp w1, #17, 4, ne ; 4 = nZcv
|
||||
// b.eq Tail
|
||||
// ...
|
||||
// Tail:
|
||||
// bl _foo
|
||||
//
|
||||
// The ccmp condition code is the one that would cause the Head terminator to
|
||||
// branch to CmpBB.
|
||||
//
|
||||
// FIXME: It should also be possible to speculate a block on the critical edge
|
||||
// between Head and Tail, just like if-converting a diamond.
|
||||
//
|
||||
// FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
|
||||
|
||||
namespace {
|
||||
class SSACCmpConv {
|
||||
MachineFunction *MF;
|
||||
const TargetInstrInfo *TII;
|
||||
const TargetRegisterInfo *TRI;
|
||||
MachineRegisterInfo *MRI;
|
||||
|
||||
public:
|
||||
/// The first block containing a conditional branch, dominating everything
|
||||
/// else.
|
||||
MachineBasicBlock *Head;
|
||||
|
||||
/// The block containing cmp+br.cond with a sucessor shared with Head.
|
||||
MachineBasicBlock *CmpBB;
|
||||
|
||||
/// The common successor for Head and CmpBB.
|
||||
MachineBasicBlock *Tail;
|
||||
|
||||
/// The compare instruction in CmpBB that can be converted to a ccmp.
|
||||
MachineInstr *CmpMI;
|
||||
|
||||
private:
|
||||
/// The branch condition in Head as determined by AnalyzeBranch.
|
||||
SmallVector<MachineOperand, 4> HeadCond;
|
||||
|
||||
/// The condition code that makes Head branch to CmpBB.
|
||||
ARM64CC::CondCode HeadCmpBBCC;
|
||||
|
||||
/// The branch condition in CmpBB.
|
||||
SmallVector<MachineOperand, 4> CmpBBCond;
|
||||
|
||||
/// The condition code that makes CmpBB branch to Tail.
|
||||
ARM64CC::CondCode CmpBBTailCC;
|
||||
|
||||
/// Check if the Tail PHIs are trivially convertible.
|
||||
bool trivialTailPHIs();
|
||||
|
||||
/// Remove CmpBB from the Tail PHIs.
|
||||
void updateTailPHIs();
|
||||
|
||||
/// Check if an operand defining DstReg is dead.
|
||||
bool isDeadDef(unsigned DstReg);
|
||||
|
||||
/// Find the compare instruction in MBB that controls the conditional branch.
|
||||
/// Return NULL if a convertible instruction can't be found.
|
||||
MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
|
||||
|
||||
/// Return true if all non-terminator instructions in MBB can be safely
|
||||
/// speculated.
|
||||
bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
|
||||
|
||||
public:
|
||||
/// runOnMachineFunction - Initialize per-function data structures.
|
||||
void runOnMachineFunction(MachineFunction &MF) {
|
||||
this->MF = &MF;
|
||||
TII = MF.getTarget().getInstrInfo();
|
||||
TRI = MF.getTarget().getRegisterInfo();
|
||||
MRI = &MF.getRegInfo();
|
||||
}
|
||||
|
||||
/// If the sub-CFG headed by MBB can be cmp-converted, initialize the
|
||||
/// internal state, and return true.
|
||||
bool canConvert(MachineBasicBlock *MBB);
|
||||
|
||||
/// Cmo-convert the last block passed to canConvertCmp(), assuming
|
||||
/// it is possible. Add any erased blocks to RemovedBlocks.
|
||||
void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
|
||||
|
||||
/// Return the expected code size delta if the conversion into a
|
||||
/// conditional compare is performed.
|
||||
int expectedCodeSizeDelta() const;
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
// Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
|
||||
// This means that no if-conversion is required when merging CmpBB into Head.
|
||||
bool SSACCmpConv::trivialTailPHIs() {
|
||||
for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
|
||||
I != E && I->isPHI(); ++I) {
|
||||
unsigned HeadReg = 0, CmpBBReg = 0;
|
||||
// PHI operands come in (VReg, MBB) pairs.
|
||||
for (unsigned oi = 1, oe = I->getNumOperands(); oi != oe; oi += 2) {
|
||||
MachineBasicBlock *MBB = I->getOperand(oi + 1).getMBB();
|
||||
unsigned Reg = I->getOperand(oi).getReg();
|
||||
if (MBB == Head) {
|
||||
assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
|
||||
HeadReg = Reg;
|
||||
}
|
||||
if (MBB == CmpBB) {
|
||||
assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
|
||||
CmpBBReg = Reg;
|
||||
}
|
||||
}
|
||||
if (HeadReg != CmpBBReg)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
|
||||
// removing the CmpBB operands. The Head operands will be identical.
|
||||
void SSACCmpConv::updateTailPHIs() {
|
||||
for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
|
||||
I != E && I->isPHI(); ++I) {
|
||||
// I is a PHI. It can have multiple entries for CmpBB.
|
||||
for (unsigned oi = I->getNumOperands(); oi > 2; oi -= 2) {
|
||||
// PHI operands are (Reg, MBB) at (oi-2, oi-1).
|
||||
if (I->getOperand(oi - 1).getMBB() == CmpBB) {
|
||||
I->RemoveOperand(oi - 1);
|
||||
I->RemoveOperand(oi - 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This pass runs before the ARM64DeadRegisterDefinitions pass, so compares are
|
||||
// still writing virtual registers without any uses.
|
||||
bool SSACCmpConv::isDeadDef(unsigned DstReg) {
|
||||
// Writes to the zero register are dead.
|
||||
if (DstReg == ARM64::WZR || DstReg == ARM64::XZR)
|
||||
return true;
|
||||
if (!TargetRegisterInfo::isVirtualRegister(DstReg))
|
||||
return false;
|
||||
// A virtual register def without any uses will be marked dead later, and
|
||||
// eventually replaced by the zero register.
|
||||
return MRI->use_nodbg_empty(DstReg);
|
||||
}
|
||||
|
||||
// Parse a condition code returned by AnalyzeBranch, and compute the CondCode
|
||||
// corresponding to TBB.
|
||||
// Return
|
||||
bool parseCond(ArrayRef<MachineOperand> Cond, ARM64CC::CondCode &CC) {
|
||||
// A normal br.cond simply has the condition code.
|
||||
if (Cond[0].getImm() != -1) {
|
||||
assert(Cond.size() == 1 && "Unknown Cond array format");
|
||||
CC = (ARM64CC::CondCode)(int)Cond[0].getImm();
|
||||
return true;
|
||||
}
|
||||
// For tbz and cbz instruction, the opcode is next.
|
||||
switch (Cond[1].getImm()) {
|
||||
default:
|
||||
// This includes tbz / tbnz branches which can't be converted to
|
||||
// ccmp + br.cond.
|
||||
return false;
|
||||
case ARM64::CBZW:
|
||||
case ARM64::CBZX:
|
||||
assert(Cond.size() == 3 && "Unknown Cond array format");
|
||||
CC = ARM64CC::EQ;
|
||||
return true;
|
||||
case ARM64::CBNZW:
|
||||
case ARM64::CBNZX:
|
||||
assert(Cond.size() == 3 && "Unknown Cond array format");
|
||||
CC = ARM64CC::NE;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
|
||||
MachineBasicBlock::iterator I = MBB->getFirstTerminator();
|
||||
if (I == MBB->end())
|
||||
return 0;
|
||||
// The terminator must be controlled by the flags.
|
||||
if (!I->readsRegister(ARM64::CPSR)) {
|
||||
switch (I->getOpcode()) {
|
||||
case ARM64::CBZW:
|
||||
case ARM64::CBZX:
|
||||
case ARM64::CBNZW:
|
||||
case ARM64::CBNZX:
|
||||
// These can be converted into a ccmp against #0.
|
||||
return I;
|
||||
}
|
||||
++NumCmpTermRejs;
|
||||
DEBUG(dbgs() << "Flags not used by terminator: " << *I);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Now find the instruction controlling the terminator.
|
||||
for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
|
||||
--I;
|
||||
assert(!I->isTerminator() && "Spurious terminator");
|
||||
switch (I->getOpcode()) {
|
||||
// cmp is an alias for subs with a dead destination register.
|
||||
case ARM64::SUBSWri:
|
||||
case ARM64::SUBSXri:
|
||||
// cmn is an alias for adds with a dead destination register.
|
||||
case ARM64::ADDSWri:
|
||||
case ARM64::ADDSXri:
|
||||
// Check that the immediate operand is within range, ccmp wants a uimm5.
|
||||
// Rd = SUBSri Rn, imm, shift
|
||||
if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
|
||||
DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
|
||||
++NumImmRangeRejs;
|
||||
return 0;
|
||||
}
|
||||
// Fall through.
|
||||
case ARM64::SUBSWrr:
|
||||
case ARM64::SUBSXrr:
|
||||
case ARM64::ADDSWrr:
|
||||
case ARM64::ADDSXrr:
|
||||
if (isDeadDef(I->getOperand(0).getReg()))
|
||||
return I;
|
||||
DEBUG(dbgs() << "Can't convert compare with live destination: " << *I);
|
||||
++NumLiveDstRejs;
|
||||
return 0;
|
||||
case ARM64::FCMPSrr:
|
||||
case ARM64::FCMPDrr:
|
||||
case ARM64::FCMPESrr:
|
||||
case ARM64::FCMPEDrr:
|
||||
return I;
|
||||
}
|
||||
|
||||
// Check for flag reads and clobbers.
|
||||
MIOperands::PhysRegInfo PRI =
|
||||
MIOperands(I).analyzePhysReg(ARM64::CPSR, TRI);
|
||||
|
||||
if (PRI.Reads) {
|
||||
// The ccmp doesn't produce exactly the same flags as the original
|
||||
// compare, so reject the transform if there are uses of the flags
|
||||
// besides the terminators.
|
||||
DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
|
||||
++NumMultCPSRUses;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (PRI.Clobbers) {
|
||||
DEBUG(dbgs() << "Not convertible compare: " << *I);
|
||||
++NumUnknCPSRDefs;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n');
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Determine if all the instructions in MBB can safely
|
||||
/// be speculated. The terminators are not considered.
|
||||
///
|
||||
/// Only CmpMI is allowed to clobber the flags.
|
||||
///
|
||||
bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
|
||||
const MachineInstr *CmpMI) {
|
||||
// Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
|
||||
// get right.
|
||||
if (!MBB->livein_empty()) {
|
||||
DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned InstrCount = 0;
|
||||
|
||||
// Check all instructions, except the terminators. It is assumed that
|
||||
// terminators never have side effects or define any used register values.
|
||||
for (MachineBasicBlock::iterator I = MBB->begin(),
|
||||
E = MBB->getFirstTerminator();
|
||||
I != E; ++I) {
|
||||
if (I->isDebugValue())
|
||||
continue;
|
||||
|
||||
if (++InstrCount > BlockInstrLimit && !Stress) {
|
||||
DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than "
|
||||
<< BlockInstrLimit << " instructions.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// There shouldn't normally be any phis in a single-predecessor block.
|
||||
if (I->isPHI()) {
|
||||
DEBUG(dbgs() << "Can't hoist: " << *I);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Don't speculate loads. Note that it may be possible and desirable to
|
||||
// speculate GOT or constant pool loads that are guaranteed not to trap,
|
||||
// but we don't support that for now.
|
||||
if (I->mayLoad()) {
|
||||
DEBUG(dbgs() << "Won't speculate load: " << *I);
|
||||
return false;
|
||||
}
|
||||
|
||||
// We never speculate stores, so an AA pointer isn't necessary.
|
||||
bool DontMoveAcrossStore = true;
|
||||
if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) {
|
||||
DEBUG(dbgs() << "Can't speculate: " << *I);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Only CmpMI is alowed to clobber the flags.
|
||||
if (&*I != CmpMI && I->modifiesRegister(ARM64::CPSR, TRI)) {
|
||||
DEBUG(dbgs() << "Clobbers flags: " << *I);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
|
||||
/// candidate for cmp-conversion. Fill out the internal state.
|
||||
///
|
||||
bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
|
||||
Head = MBB;
|
||||
Tail = CmpBB = 0;
|
||||
|
||||
if (Head->succ_size() != 2)
|
||||
return false;
|
||||
MachineBasicBlock *Succ0 = Head->succ_begin()[0];
|
||||
MachineBasicBlock *Succ1 = Head->succ_begin()[1];
|
||||
|
||||
// CmpBB can only have a single predecessor. Tail is allowed many.
|
||||
if (Succ0->pred_size() != 1)
|
||||
std::swap(Succ0, Succ1);
|
||||
|
||||
// Succ0 is our candidate for CmpBB.
|
||||
if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
|
||||
return false;
|
||||
|
||||
CmpBB = Succ0;
|
||||
Tail = Succ1;
|
||||
|
||||
if (!CmpBB->isSuccessor(Tail))
|
||||
return false;
|
||||
|
||||
// The CFG topology checks out.
|
||||
DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() << " -> BB#"
|
||||
<< CmpBB->getNumber() << " -> BB#" << Tail->getNumber() << '\n');
|
||||
++NumConsidered;
|
||||
|
||||
// Tail is allowed to have many predecessors, but we can't handle PHIs yet.
|
||||
//
|
||||
// FIXME: Real PHIs could be if-converted as long as the CmpBB values are
|
||||
// defined before The CmpBB cmp clobbers the flags. Alternatively, it should
|
||||
// always be safe to sink the ccmp down to immediately before the CmpBB
|
||||
// terminators.
|
||||
if (!trivialTailPHIs()) {
|
||||
DEBUG(dbgs() << "Can't handle phis in Tail.\n");
|
||||
++NumPhiRejs;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!Tail->livein_empty()) {
|
||||
DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
|
||||
++NumPhysRejs;
|
||||
return false;
|
||||
}
|
||||
|
||||
// CmpBB should never have PHIs since Head is its only predecessor.
|
||||
// FIXME: Clean them up if it happens.
|
||||
if (!CmpBB->empty() && CmpBB->front().isPHI()) {
|
||||
DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
|
||||
++NumPhi2Rejs;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!CmpBB->livein_empty()) {
|
||||
DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
|
||||
++NumPhysRejs;
|
||||
return false;
|
||||
}
|
||||
|
||||
// The branch we're looking to eliminate must be analyzable.
|
||||
HeadCond.clear();
|
||||
MachineBasicBlock *TBB = 0, *FBB = 0;
|
||||
if (TII->AnalyzeBranch(*Head, TBB, FBB, HeadCond)) {
|
||||
DEBUG(dbgs() << "Head branch not analyzable.\n");
|
||||
++NumHeadBranchRejs;
|
||||
return false;
|
||||
}
|
||||
|
||||
// This is weird, probably some sort of degenerate CFG, or an edge to a
|
||||
// landing pad.
|
||||
if (!TBB || HeadCond.empty()) {
|
||||
DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in Head.\n");
|
||||
++NumHeadBranchRejs;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!parseCond(HeadCond, HeadCmpBBCC)) {
|
||||
DEBUG(dbgs() << "Unsupported branch type on Head\n");
|
||||
++NumHeadBranchRejs;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Make sure the branch direction is right.
|
||||
if (TBB != CmpBB) {
|
||||
assert(TBB == Tail && "Unexpected TBB");
|
||||
HeadCmpBBCC = ARM64CC::getInvertedCondCode(HeadCmpBBCC);
|
||||
}
|
||||
|
||||
CmpBBCond.clear();
|
||||
TBB = FBB = 0;
|
||||
if (TII->AnalyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) {
|
||||
DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
|
||||
++NumCmpBranchRejs;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!TBB || CmpBBCond.empty()) {
|
||||
DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in CmpBB.\n");
|
||||
++NumCmpBranchRejs;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!parseCond(CmpBBCond, CmpBBTailCC)) {
|
||||
DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
|
||||
++NumCmpBranchRejs;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (TBB != Tail)
|
||||
CmpBBTailCC = ARM64CC::getInvertedCondCode(CmpBBTailCC);
|
||||
|
||||
DEBUG(dbgs() << "Head->CmpBB on " << ARM64CC::getCondCodeName(HeadCmpBBCC)
|
||||
<< ", CmpBB->Tail on " << ARM64CC::getCondCodeName(CmpBBTailCC)
|
||||
<< '\n');
|
||||
|
||||
CmpMI = findConvertibleCompare(CmpBB);
|
||||
if (!CmpMI)
|
||||
return false;
|
||||
|
||||
if (!canSpeculateInstrs(CmpBB, CmpMI)) {
|
||||
++NumSpeculateRejs;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
|
||||
DEBUG(dbgs() << "Merging BB#" << CmpBB->getNumber() << " into BB#"
|
||||
<< Head->getNumber() << ":\n" << *CmpBB);
|
||||
|
||||
// All CmpBB instructions are moved into Head, and CmpBB is deleted.
|
||||
// Update the CFG first.
|
||||
updateTailPHIs();
|
||||
Head->removeSuccessor(CmpBB);
|
||||
CmpBB->removeSuccessor(Tail);
|
||||
Head->transferSuccessorsAndUpdatePHIs(CmpBB);
|
||||
DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
|
||||
TII->RemoveBranch(*Head);
|
||||
|
||||
// If the Head terminator was one of the cbz / tbz branches with built-in
|
||||
// compare, we need to insert an explicit compare instruction in its place.
|
||||
if (HeadCond[0].getImm() == -1) {
|
||||
++NumCompBranches;
|
||||
unsigned Opc = 0;
|
||||
switch (HeadCond[1].getImm()) {
|
||||
case ARM64::CBZW:
|
||||
case ARM64::CBNZW:
|
||||
Opc = ARM64::SUBSWri;
|
||||
break;
|
||||
case ARM64::CBZX:
|
||||
case ARM64::CBNZX:
|
||||
Opc = ARM64::SUBSXri;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Cannot convert Head branch");
|
||||
}
|
||||
const MCInstrDesc &MCID = TII->get(Opc);
|
||||
// Create a dummy virtual register for the SUBS def.
|
||||
unsigned DestReg =
|
||||
MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF));
|
||||
// Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
|
||||
BuildMI(*Head, Head->end(), TermDL, MCID)
|
||||
.addReg(DestReg, RegState::Define | RegState::Dead)
|
||||
.addOperand(HeadCond[2])
|
||||
.addImm(0)
|
||||
.addImm(0);
|
||||
// SUBS uses the GPR*sp register classes.
|
||||
MRI->constrainRegClass(HeadCond[2].getReg(),
|
||||
TII->getRegClass(MCID, 1, TRI, *MF));
|
||||
}
|
||||
|
||||
Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
|
||||
|
||||
// Now replace CmpMI with a ccmp instruction that also considers the incoming
|
||||
// flags.
|
||||
unsigned Opc = 0;
|
||||
unsigned FirstOp = 1; // First CmpMI operand to copy.
|
||||
bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
|
||||
switch (CmpMI->getOpcode()) {
|
||||
default:
|
||||
llvm_unreachable("Unknown compare opcode");
|
||||
case ARM64::SUBSWri: Opc = ARM64::CCMPWi; break;
|
||||
case ARM64::SUBSWrr: Opc = ARM64::CCMPWr; break;
|
||||
case ARM64::SUBSXri: Opc = ARM64::CCMPXi; break;
|
||||
case ARM64::SUBSXrr: Opc = ARM64::CCMPXr; break;
|
||||
case ARM64::ADDSWri: Opc = ARM64::CCMNWi; break;
|
||||
case ARM64::ADDSWrr: Opc = ARM64::CCMNWr; break;
|
||||
case ARM64::ADDSXri: Opc = ARM64::CCMNXi; break;
|
||||
case ARM64::ADDSXrr: Opc = ARM64::CCMNXr; break;
|
||||
case ARM64::FCMPSrr: Opc = ARM64::FCCMPSrr; FirstOp = 0; break;
|
||||
case ARM64::FCMPDrr: Opc = ARM64::FCCMPDrr; FirstOp = 0; break;
|
||||
case ARM64::FCMPESrr: Opc = ARM64::FCCMPESrr; FirstOp = 0; break;
|
||||
case ARM64::FCMPEDrr: Opc = ARM64::FCCMPEDrr; FirstOp = 0; break;
|
||||
case ARM64::CBZW:
|
||||
case ARM64::CBNZW:
|
||||
Opc = ARM64::CCMPWi;
|
||||
FirstOp = 0;
|
||||
isZBranch = true;
|
||||
break;
|
||||
case ARM64::CBZX:
|
||||
case ARM64::CBNZX:
|
||||
Opc = ARM64::CCMPXi;
|
||||
FirstOp = 0;
|
||||
isZBranch = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// The ccmp instruction should set the flags according to the comparison when
|
||||
// Head would have branched to CmpBB.
|
||||
// The NZCV immediate operand should provide flags for the case where Head
|
||||
// would have branched to Tail. These flags should cause the new Head
|
||||
// terminator to branch to tail.
|
||||
unsigned NZCV = ARM64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
|
||||
const MCInstrDesc &MCID = TII->get(Opc);
|
||||
MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
|
||||
TII->getRegClass(MCID, 0, TRI, *MF));
|
||||
if (CmpMI->getOperand(FirstOp + 1).isReg())
|
||||
MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
|
||||
TII->getRegClass(MCID, 1, TRI, *MF));
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
|
||||
.addOperand(CmpMI->getOperand(FirstOp)); // Register Rn
|
||||
if (isZBranch)
|
||||
MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
|
||||
else
|
||||
MIB.addOperand(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
|
||||
MIB.addImm(NZCV).addImm(HeadCmpBBCC);
|
||||
|
||||
// If CmpMI was a terminator, we need a new conditional branch to replace it.
|
||||
// This now becomes a Head terminator.
|
||||
if (isZBranch) {
|
||||
bool isNZ = CmpMI->getOpcode() == ARM64::CBNZW ||
|
||||
CmpMI->getOpcode() == ARM64::CBNZX;
|
||||
BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(ARM64::Bcc))
|
||||
.addImm(isNZ ? ARM64CC::NE : ARM64CC::EQ)
|
||||
.addOperand(CmpMI->getOperand(1)); // Branch target.
|
||||
}
|
||||
CmpMI->eraseFromParent();
|
||||
Head->updateTerminator();
|
||||
|
||||
RemovedBlocks.push_back(CmpBB);
|
||||
CmpBB->eraseFromParent();
|
||||
DEBUG(dbgs() << "Result:\n" << *Head);
|
||||
++NumConverted;
|
||||
}
|
||||
|
||||
int SSACCmpConv::expectedCodeSizeDelta() const {
|
||||
int delta = 0;
|
||||
// If the Head terminator was one of the cbz / tbz branches with built-in
|
||||
// compare, we need to insert an explicit compare instruction in its place
|
||||
// plus a branch instruction.
|
||||
if (HeadCond[0].getImm() == -1) {
|
||||
switch (HeadCond[1].getImm()) {
|
||||
case ARM64::CBZW:
|
||||
case ARM64::CBNZW:
|
||||
case ARM64::CBZX:
|
||||
case ARM64::CBNZX:
|
||||
// Therefore delta += 1
|
||||
delta = 1;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Cannot convert Head branch");
|
||||
}
|
||||
}
|
||||
// If the Cmp terminator was one of the cbz / tbz branches with
|
||||
// built-in compare, it will be turned into a compare instruction
|
||||
// into Head, but we do not save any instruction.
|
||||
// Otherwise, we save the branch instruction.
|
||||
switch (CmpMI->getOpcode()) {
|
||||
default:
|
||||
--delta;
|
||||
break;
|
||||
case ARM64::CBZW:
|
||||
case ARM64::CBNZW:
|
||||
case ARM64::CBZX:
|
||||
case ARM64::CBNZX:
|
||||
break;
|
||||
}
|
||||
return delta;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ARM64ConditionalCompares Pass
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
namespace {
|
||||
class ARM64ConditionalCompares : public MachineFunctionPass {
|
||||
const TargetInstrInfo *TII;
|
||||
const TargetRegisterInfo *TRI;
|
||||
const MCSchedModel *SchedModel;
|
||||
// Does the proceeded function has Oz attribute.
|
||||
bool MinSize;
|
||||
MachineRegisterInfo *MRI;
|
||||
MachineDominatorTree *DomTree;
|
||||
MachineLoopInfo *Loops;
|
||||
MachineTraceMetrics *Traces;
|
||||
MachineTraceMetrics::Ensemble *MinInstr;
|
||||
SSACCmpConv CmpConv;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
ARM64ConditionalCompares() : MachineFunctionPass(ID) {}
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const;
|
||||
bool runOnMachineFunction(MachineFunction &MF);
|
||||
const char *getPassName() const { return "ARM64 Conditional Compares"; }
|
||||
|
||||
private:
|
||||
bool tryConvert(MachineBasicBlock *);
|
||||
void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
|
||||
void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
|
||||
void invalidateTraces();
|
||||
bool shouldConvert();
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
char ARM64ConditionalCompares::ID = 0;
|
||||
|
||||
namespace llvm {
|
||||
void initializeARM64ConditionalComparesPass(PassRegistry &);
|
||||
}
|
||||
|
||||
INITIALIZE_PASS_BEGIN(ARM64ConditionalCompares, "arm64-ccmp", "ARM64 CCMP Pass",
|
||||
false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
|
||||
INITIALIZE_PASS_END(ARM64ConditionalCompares, "arm64-ccmp", "ARM64 CCMP Pass",
|
||||
false, false)
|
||||
|
||||
FunctionPass *llvm::createARM64ConditionalCompares() {
|
||||
return new ARM64ConditionalCompares();
|
||||
}
|
||||
|
||||
void ARM64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequired<MachineBranchProbabilityInfo>();
|
||||
AU.addRequired<MachineDominatorTree>();
|
||||
AU.addPreserved<MachineDominatorTree>();
|
||||
AU.addRequired<MachineLoopInfo>();
|
||||
AU.addPreserved<MachineLoopInfo>();
|
||||
AU.addRequired<MachineTraceMetrics>();
|
||||
AU.addPreserved<MachineTraceMetrics>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
/// Update the dominator tree after if-conversion erased some blocks.
|
||||
void
|
||||
ARM64ConditionalCompares::updateDomTree(ArrayRef<MachineBasicBlock *> Removed) {
|
||||
// convert() removes CmpBB which was previously dominated by Head.
|
||||
// CmpBB children should be transferred to Head.
|
||||
MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
|
||||
for (unsigned i = 0, e = Removed.size(); i != e; ++i) {
|
||||
MachineDomTreeNode *Node = DomTree->getNode(Removed[i]);
|
||||
assert(Node != HeadNode && "Cannot erase the head node");
|
||||
assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
|
||||
while (Node->getNumChildren())
|
||||
DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
|
||||
DomTree->eraseNode(Removed[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/// Update LoopInfo after if-conversion.
|
||||
void
|
||||
ARM64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
|
||||
if (!Loops)
|
||||
return;
|
||||
for (unsigned i = 0, e = Removed.size(); i != e; ++i)
|
||||
Loops->removeBlock(Removed[i]);
|
||||
}
|
||||
|
||||
/// Invalidate MachineTraceMetrics before if-conversion.
|
||||
void ARM64ConditionalCompares::invalidateTraces() {
|
||||
Traces->invalidate(CmpConv.Head);
|
||||
Traces->invalidate(CmpConv.CmpBB);
|
||||
}
|
||||
|
||||
/// Apply cost model and heuristics to the if-conversion in IfConv.
|
||||
/// Return true if the conversion is a good idea.
|
||||
///
|
||||
bool ARM64ConditionalCompares::shouldConvert() {
|
||||
// Stress testing mode disables all cost considerations.
|
||||
if (Stress)
|
||||
return true;
|
||||
if (!MinInstr)
|
||||
MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
|
||||
|
||||
// Head dominates CmpBB, so it is always included in its trace.
|
||||
MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);
|
||||
|
||||
// If code size is the main concern
|
||||
if (MinSize) {
|
||||
int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
|
||||
DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n');
|
||||
// If we are minimizing the code size, do the conversion whatever
|
||||
// the cost is.
|
||||
if (CodeSizeDelta < 0)
|
||||
return true;
|
||||
if (CodeSizeDelta > 0) {
|
||||
DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
|
||||
return false;
|
||||
}
|
||||
// CodeSizeDelta == 0, continue with the regular heuristics
|
||||
}
|
||||
|
||||
// Heuristic: The compare conversion delays the execution of the branch
|
||||
// instruction because we must wait for the inputs to the second compare as
|
||||
// well. The branch has no dependent instructions, but delaying it increases
|
||||
// the cost of a misprediction.
|
||||
//
|
||||
// Set a limit on the delay we will accept.
|
||||
unsigned DelayLimit = SchedModel->MispredictPenalty * 3 / 4;
|
||||
|
||||
// Instruction depths can be computed for all trace instructions above CmpBB.
|
||||
unsigned HeadDepth =
|
||||
Trace.getInstrCycles(CmpConv.Head->getFirstTerminator()).Depth;
|
||||
unsigned CmpBBDepth =
|
||||
Trace.getInstrCycles(CmpConv.CmpBB->getFirstTerminator()).Depth;
|
||||
DEBUG(dbgs() << "Head depth: " << HeadDepth
|
||||
<< "\nCmpBB depth: " << CmpBBDepth << '\n');
|
||||
if (CmpBBDepth > HeadDepth + DelayLimit) {
|
||||
DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
|
||||
<< " cycles.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check the resource depth at the bottom of CmpBB - these instructions will
|
||||
// be speculated.
|
||||
unsigned ResDepth = Trace.getResourceDepth(true);
|
||||
DEBUG(dbgs() << "Resources: " << ResDepth << '\n');
|
||||
|
||||
// Heuristic: The speculatively executed instructions must all be able to
|
||||
// merge into the Head block. The Head critical path should dominate the
|
||||
// resource cost of the speculated instructions.
|
||||
if (ResDepth > HeadDepth) {
|
||||
DEBUG(dbgs() << "Too many instructions to speculate.\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARM64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
|
||||
bool Changed = false;
|
||||
while (CmpConv.canConvert(MBB) && shouldConvert()) {
|
||||
invalidateTraces();
|
||||
SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
|
||||
CmpConv.convert(RemovedBlocks);
|
||||
Changed = true;
|
||||
updateDomTree(RemovedBlocks);
|
||||
updateLoops(RemovedBlocks);
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool ARM64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
|
||||
DEBUG(dbgs() << "********** ARM64 Conditional Compares **********\n"
|
||||
<< "********** Function: " << MF.getName() << '\n');
|
||||
TII = MF.getTarget().getInstrInfo();
|
||||
TRI = MF.getTarget().getRegisterInfo();
|
||||
SchedModel =
|
||||
MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel();
|
||||
MRI = &MF.getRegInfo();
|
||||
DomTree = &getAnalysis<MachineDominatorTree>();
|
||||
Loops = getAnalysisIfAvailable<MachineLoopInfo>();
|
||||
Traces = &getAnalysis<MachineTraceMetrics>();
|
||||
MinInstr = 0;
|
||||
MinSize = MF.getFunction()->getAttributes().hasAttribute(
|
||||
AttributeSet::FunctionIndex, Attribute::MinSize);
|
||||
|
||||
bool Changed = false;
|
||||
CmpConv.runOnMachineFunction(MF);
|
||||
|
||||
// Visit blocks in dominator tree pre-order. The pre-order enables multiple
|
||||
// cmp-conversions from the same head block.
|
||||
// Note that updateDomTree() modifies the children of the DomTree node
|
||||
// currently being visited. The df_iterator supports that, it doesn't look at
|
||||
// child_begin() / child_end() until after a node has been visited.
|
||||
for (df_iterator<MachineDominatorTree *> I = df_begin(DomTree),
|
||||
E = df_end(DomTree);
|
||||
I != E; ++I)
|
||||
if (tryConvert(I->getBlock()))
|
||||
Changed = true;
|
||||
|
||||
return Changed;
|
||||
}
|
104
lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp
Normal file
104
lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp
Normal file
@ -0,0 +1,104 @@
|
||||
//===-- ARM64DeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// When allowed by the instruction, replace a dead definition of a GPR with
|
||||
// the zero register. This makes the code a bit friendlier towards the
|
||||
// hardware's register renamer.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "arm64-dead-defs"
|
||||
#include "ARM64.h"
|
||||
#include "ARM64RegisterInfo.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
using namespace llvm;
|
||||
|
||||
STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");
|
||||
|
||||
namespace {
|
||||
class ARM64DeadRegisterDefinitions : public MachineFunctionPass {
|
||||
private:
|
||||
bool processMachineBasicBlock(MachineBasicBlock *MBB);
|
||||
|
||||
public:
|
||||
static char ID; // Pass identification, replacement for typeid.
|
||||
explicit ARM64DeadRegisterDefinitions() : MachineFunctionPass(ID) {}
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &F);
|
||||
|
||||
const char *getPassName() const { return "Dead register definitions"; }
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesCFG();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
char ARM64DeadRegisterDefinitions::ID = 0;
|
||||
} // end anonymous namespace
|
||||
|
||||
bool
|
||||
ARM64DeadRegisterDefinitions::processMachineBasicBlock(MachineBasicBlock *MBB) {
|
||||
bool Changed = false;
|
||||
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
|
||||
++I) {
|
||||
MachineInstr *MI = I;
|
||||
for (int i = 0, e = MI->getDesc().getNumDefs(); i != e; ++i) {
|
||||
MachineOperand &MO = MI->getOperand(i);
|
||||
if (MO.isReg() && MO.isDead() && MO.isDef()) {
|
||||
assert(!MO.isImplicit() && "Unexpected implicit def!");
|
||||
DEBUG(dbgs() << " Dead def operand #" << i << " in:\n ";
|
||||
MI->print(dbgs()));
|
||||
// Be careful not to change the register if it's a tied operand.
|
||||
if (MI->isRegTiedToUseOperand(i)) {
|
||||
DEBUG(dbgs() << " Ignoring, def is tied operand.\n");
|
||||
continue;
|
||||
}
|
||||
// Make sure the instruction take a register class that contains
|
||||
// the zero register and replace it if so.
|
||||
unsigned NewReg;
|
||||
switch (MI->getDesc().OpInfo[i].RegClass) {
|
||||
default:
|
||||
DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
|
||||
continue;
|
||||
case ARM64::GPR32RegClassID:
|
||||
NewReg = ARM64::WZR;
|
||||
break;
|
||||
case ARM64::GPR64RegClassID:
|
||||
NewReg = ARM64::XZR;
|
||||
break;
|
||||
}
|
||||
DEBUG(dbgs() << " Replacing with zero register. New:\n ");
|
||||
MO.setReg(NewReg);
|
||||
DEBUG(MI->print(dbgs()));
|
||||
++NumDeadDefsReplaced;
|
||||
}
|
||||
}
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
// Scan the function for instructions that have a dead definition of a
|
||||
// register. Replace that register with the zero register when possible.
|
||||
bool ARM64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &mf) {
|
||||
MachineFunction *MF = &mf;
|
||||
bool Changed = false;
|
||||
DEBUG(dbgs() << "***** ARM64DeadRegisterDefinitions *****\n");
|
||||
|
||||
for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
|
||||
if (processMachineBasicBlock(I))
|
||||
Changed = true;
|
||||
return Changed;
|
||||
}
|
||||
|
||||
FunctionPass *llvm::createARM64DeadRegisterDefinitions() {
|
||||
return new ARM64DeadRegisterDefinitions();
|
||||
}
|
726
lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp
Normal file
726
lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp
Normal file
@ -0,0 +1,726 @@
|
||||
//===-- ARM64ExpandPseudoInsts.cpp - Expand pseudo instructions ---*- C++ -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains a pass that expands pseudo instructions into target
|
||||
// instructions to allow proper scheduling and other late optimizations. This
|
||||
// pass should be run after register allocation but before the post-regalloc
|
||||
// scheduling pass.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "MCTargetDesc/ARM64AddressingModes.h"
|
||||
#include "ARM64InstrInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
class ARM64ExpandPseudo : public MachineFunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
ARM64ExpandPseudo() : MachineFunctionPass(ID) {}
|
||||
|
||||
const ARM64InstrInfo *TII;
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &Fn);
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "ARM64 pseudo instruction expansion pass";
|
||||
}
|
||||
|
||||
private:
|
||||
bool expandMBB(MachineBasicBlock &MBB);
|
||||
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
|
||||
bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
unsigned BitSize);
|
||||
};
|
||||
char ARM64ExpandPseudo::ID = 0;
|
||||
}
|
||||
|
||||
/// \brief Transfer implicit operands on the pseudo instruction to the
|
||||
/// instructions created from the expansion.
|
||||
static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
|
||||
MachineInstrBuilder &DefMI) {
|
||||
const MCInstrDesc &Desc = OldMI.getDesc();
|
||||
for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
|
||||
++i) {
|
||||
const MachineOperand &MO = OldMI.getOperand(i);
|
||||
assert(MO.isReg() && MO.getReg());
|
||||
if (MO.isUse())
|
||||
UseMI.addOperand(MO);
|
||||
else
|
||||
DefMI.addOperand(MO);
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Helper function which extracts the specified 16-bit chunk from a
|
||||
/// 64-bit value.
|
||||
static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
|
||||
assert(ChunkIdx < 4 && "Out of range chunk index specified!");
|
||||
|
||||
return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
|
||||
}
|
||||
|
||||
/// \brief Helper function which replicates a 16-bit chunk within a 64-bit
|
||||
/// value. Indices correspond to element numbers in a v4i16.
|
||||
static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) {
|
||||
assert((FromIdx < 4) && (ToIdx < 4) && "Out of range chunk index specified!");
|
||||
const unsigned ShiftAmt = ToIdx * 16;
|
||||
|
||||
// Replicate the source chunk to the destination position.
|
||||
const uint64_t Chunk = getChunk(Imm, FromIdx) << ShiftAmt;
|
||||
// Clear the destination chunk.
|
||||
Imm &= ~(0xFFFFLL << ShiftAmt);
|
||||
// Insert the replicated chunk.
|
||||
return Imm | Chunk;
|
||||
}
|
||||
|
||||
/// \brief Helper function which tries to materialize a 64-bit value with an
|
||||
/// ORR + MOVK instruction sequence.
|
||||
static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &MBBI,
|
||||
const ARM64InstrInfo *TII, unsigned ChunkIdx) {
|
||||
assert(ChunkIdx < 4 && "Out of range chunk index specified!");
|
||||
const unsigned ShiftAmt = ChunkIdx * 16;
|
||||
|
||||
uint64_t Encoding;
|
||||
if (ARM64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) {
|
||||
// Create the ORR-immediate instruction.
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri))
|
||||
.addOperand(MI.getOperand(0))
|
||||
.addReg(ARM64::XZR)
|
||||
.addImm(Encoding);
|
||||
|
||||
// Create the MOVK instruction.
|
||||
const unsigned Imm16 = getChunk(UImm, ChunkIdx);
|
||||
const unsigned DstReg = MI.getOperand(0).getReg();
|
||||
const bool DstIsDead = MI.getOperand(0).isDead();
|
||||
MachineInstrBuilder MIB1 =
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
|
||||
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
|
||||
.addReg(DstReg)
|
||||
.addImm(Imm16)
|
||||
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt));
|
||||
|
||||
transferImpOps(MI, MIB, MIB1);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \brief Check whether the given 16-bit chunk replicated to full 64-bit width
|
||||
/// can be materialized with an ORR instruction.
|
||||
static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
|
||||
Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
|
||||
|
||||
return ARM64_AM::processLogicalImmediate(Chunk, 64, Encoding);
|
||||
}
|
||||
|
||||
/// \brief Check for identical 16-bit chunks within the constant and if so
|
||||
/// materialize them with a single ORR instruction. The remaining one or two
|
||||
/// 16-bit chunks will be materialized with MOVK instructions.
|
||||
///
|
||||
/// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order
|
||||
/// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with
|
||||
/// an ORR instruction.
|
||||
///
|
||||
static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &MBBI,
|
||||
const ARM64InstrInfo *TII) {
|
||||
typedef DenseMap<uint64_t, unsigned> CountMap;
|
||||
CountMap Counts;
|
||||
|
||||
// Scan the constant and count how often every chunk occurs.
|
||||
for (unsigned Idx = 0; Idx < 4; ++Idx)
|
||||
++Counts[getChunk(UImm, Idx)];
|
||||
|
||||
// Traverse the chunks to find one which occurs more than once.
|
||||
for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
|
||||
Chunk != End; ++Chunk) {
|
||||
const uint64_t ChunkVal = Chunk->first;
|
||||
const unsigned Count = Chunk->second;
|
||||
|
||||
uint64_t Encoding = 0;
|
||||
|
||||
// We are looking for chunks which have two or three instances and can be
|
||||
// materialized with an ORR instruction.
|
||||
if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
|
||||
continue;
|
||||
|
||||
const bool CountThree = Count == 3;
|
||||
// Create the ORR-immediate instruction.
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri))
|
||||
.addOperand(MI.getOperand(0))
|
||||
.addReg(ARM64::XZR)
|
||||
.addImm(Encoding);
|
||||
|
||||
const unsigned DstReg = MI.getOperand(0).getReg();
|
||||
const bool DstIsDead = MI.getOperand(0).isDead();
|
||||
|
||||
unsigned ShiftAmt = 0;
|
||||
uint64_t Imm16 = 0;
|
||||
// Find the first chunk not materialized with the ORR instruction.
|
||||
for (; ShiftAmt < 64; ShiftAmt += 16) {
|
||||
Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
|
||||
|
||||
if (Imm16 != ChunkVal)
|
||||
break;
|
||||
}
|
||||
|
||||
// Create the first MOVK instruction.
|
||||
MachineInstrBuilder MIB1 =
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
|
||||
.addReg(DstReg,
|
||||
RegState::Define | getDeadRegState(DstIsDead && CountThree))
|
||||
.addReg(DstReg)
|
||||
.addImm(Imm16)
|
||||
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt));
|
||||
|
||||
// In case we have three instances the whole constant is now materialized
|
||||
// and we can exit.
|
||||
if (CountThree) {
|
||||
transferImpOps(MI, MIB, MIB1);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
// Find the remaining chunk which needs to be materialized.
|
||||
for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
|
||||
Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
|
||||
|
||||
if (Imm16 != ChunkVal)
|
||||
break;
|
||||
}
|
||||
|
||||
// Create the second MOVK instruction.
|
||||
MachineInstrBuilder MIB2 =
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
|
||||
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
|
||||
.addReg(DstReg)
|
||||
.addImm(Imm16)
|
||||
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt));
|
||||
|
||||
transferImpOps(MI, MIB, MIB2);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \brief Check whether this chunk matches the pattern '1...0...'. This pattern
|
||||
/// starts a contiguous sequence of ones if we look at the bits from the LSB
|
||||
/// towards the MSB.
|
||||
static bool isStartChunk(uint64_t Chunk) {
|
||||
if (Chunk == 0 || Chunk == UINT64_MAX)
|
||||
return false;
|
||||
|
||||
return (CountLeadingOnes_64(Chunk) + countTrailingZeros(Chunk)) == 64;
|
||||
}
|
||||
|
||||
/// \brief Check whether this chunk matches the pattern '0...1...' This pattern
|
||||
/// ends a contiguous sequence of ones if we look at the bits from the LSB
|
||||
/// towards the MSB.
|
||||
static bool isEndChunk(uint64_t Chunk) {
|
||||
if (Chunk == 0 || Chunk == UINT64_MAX)
|
||||
return false;
|
||||
|
||||
return (countLeadingZeros(Chunk) + CountTrailingOnes_64(Chunk)) == 64;
|
||||
}
|
||||
|
||||
/// \brief Clear or set all bits in the chunk at the given index.
|
||||
static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
|
||||
const uint64_t Mask = 0xFFFF;
|
||||
|
||||
if (Clear)
|
||||
// Clear chunk in the immediate.
|
||||
Imm &= ~(Mask << (Idx * 16));
|
||||
else
|
||||
// Set all bits in the immediate for the particular chunk.
|
||||
Imm |= Mask << (Idx * 16);
|
||||
|
||||
return Imm;
|
||||
}
|
||||
|
||||
/// \brief Check whether the constant contains a sequence of contiguous ones,
|
||||
/// which might be interrupted by one or two chunks. If so, materialize the
|
||||
/// sequence of contiguous ones with an ORR instruction.
|
||||
/// Materialize the chunks which are either interrupting the sequence or outside
|
||||
/// of the sequence with a MOVK instruction.
|
||||
///
|
||||
/// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
|
||||
/// which ends the sequence (0...1...). Then we are looking for constants which
|
||||
/// contain at least one S and E chunk.
|
||||
/// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|.
|
||||
///
|
||||
/// We are also looking for constants like |S|A|B|E| where the contiguous
|
||||
/// sequence of ones wraps around the MSB into the LSB.
|
||||
///
|
||||
static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &MBBI,
|
||||
const ARM64InstrInfo *TII) {
|
||||
const int NotSet = -1;
|
||||
const uint64_t Mask = 0xFFFF;
|
||||
|
||||
int StartIdx = NotSet;
|
||||
int EndIdx = NotSet;
|
||||
// Try to find the chunks which start/end a contiguous sequence of ones.
|
||||
for (int Idx = 0; Idx < 4; ++Idx) {
|
||||
int64_t Chunk = getChunk(UImm, Idx);
|
||||
// Sign extend the 16-bit chunk to 64-bit.
|
||||
Chunk = (Chunk << 48) >> 48;
|
||||
|
||||
if (isStartChunk(Chunk))
|
||||
StartIdx = Idx;
|
||||
else if (isEndChunk(Chunk))
|
||||
EndIdx = Idx;
|
||||
}
|
||||
|
||||
// Early exit in case we can't find a start/end chunk.
|
||||
if (StartIdx == NotSet || EndIdx == NotSet)
|
||||
return false;
|
||||
|
||||
// Outside of the contiguous sequence of ones everything needs to be zero.
|
||||
uint64_t Outside = 0;
|
||||
// Chunks between the start and end chunk need to have all their bits set.
|
||||
uint64_t Inside = Mask;
|
||||
|
||||
// If our contiguous sequence of ones wraps around from the MSB into the LSB,
|
||||
// just swap indices and pretend we are materializing a contiguous sequence
|
||||
// of zeros surrounded by a contiguous sequence of ones.
|
||||
if (StartIdx > EndIdx) {
|
||||
std::swap(StartIdx, EndIdx);
|
||||
std::swap(Outside, Inside);
|
||||
}
|
||||
|
||||
uint64_t OrrImm = UImm;
|
||||
int FirstMovkIdx = NotSet;
|
||||
int SecondMovkIdx = NotSet;
|
||||
|
||||
// Find out which chunks we need to patch up to obtain a contiguous sequence
|
||||
// of ones.
|
||||
for (int Idx = 0; Idx < 4; ++Idx) {
|
||||
const uint64_t Chunk = getChunk(UImm, Idx);
|
||||
|
||||
// Check whether we are looking at a chunk which is not part of the
|
||||
// contiguous sequence of ones.
|
||||
if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
|
||||
OrrImm = updateImm(OrrImm, Idx, Outside == 0);
|
||||
|
||||
// Remember the index we need to patch.
|
||||
if (FirstMovkIdx == NotSet)
|
||||
FirstMovkIdx = Idx;
|
||||
else
|
||||
SecondMovkIdx = Idx;
|
||||
|
||||
// Check whether we are looking a chunk which is part of the contiguous
|
||||
// sequence of ones.
|
||||
} else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
|
||||
OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
|
||||
|
||||
// Remember the index we need to patch.
|
||||
if (FirstMovkIdx == NotSet)
|
||||
FirstMovkIdx = Idx;
|
||||
else
|
||||
SecondMovkIdx = Idx;
|
||||
}
|
||||
}
|
||||
assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
|
||||
|
||||
// Create the ORR-immediate instruction.
|
||||
uint64_t Encoding = 0;
|
||||
ARM64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri))
|
||||
.addOperand(MI.getOperand(0))
|
||||
.addReg(ARM64::XZR)
|
||||
.addImm(Encoding);
|
||||
|
||||
const unsigned DstReg = MI.getOperand(0).getReg();
|
||||
const bool DstIsDead = MI.getOperand(0).isDead();
|
||||
|
||||
const bool SingleMovk = SecondMovkIdx == NotSet;
|
||||
// Create the first MOVK instruction.
|
||||
MachineInstrBuilder MIB1 =
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
|
||||
.addReg(DstReg,
|
||||
RegState::Define | getDeadRegState(DstIsDead && SingleMovk))
|
||||
.addReg(DstReg)
|
||||
.addImm(getChunk(UImm, FirstMovkIdx))
|
||||
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, FirstMovkIdx * 16));
|
||||
|
||||
// Early exit in case we only need to emit a single MOVK instruction.
|
||||
if (SingleMovk) {
|
||||
transferImpOps(MI, MIB, MIB1);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
// Create the second MOVK instruction.
|
||||
MachineInstrBuilder MIB2 =
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
|
||||
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
|
||||
.addReg(DstReg)
|
||||
.addImm(getChunk(UImm, SecondMovkIdx))
|
||||
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, SecondMovkIdx * 16));
|
||||
|
||||
transferImpOps(MI, MIB, MIB2);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
|
||||
/// real move-immediate instructions to synthesize the immediate.
|
||||
bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
unsigned BitSize) {
|
||||
MachineInstr &MI = *MBBI;
|
||||
uint64_t Imm = MI.getOperand(1).getImm();
|
||||
const unsigned Mask = 0xFFFF;
|
||||
|
||||
// Try a MOVI instruction (aka ORR-immediate with the zero register).
|
||||
uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
|
||||
uint64_t Encoding;
|
||||
if (ARM64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
|
||||
unsigned Opc = (BitSize == 32 ? ARM64::ORRWri : ARM64::ORRXri);
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
|
||||
.addOperand(MI.getOperand(0))
|
||||
.addReg(BitSize == 32 ? ARM64::WZR : ARM64::XZR)
|
||||
.addImm(Encoding);
|
||||
transferImpOps(MI, MIB, MIB);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
// Scan the immediate and count the number of 16-bit chunks which are either
|
||||
// all ones or all zeros.
|
||||
unsigned OneChunks = 0;
|
||||
unsigned ZeroChunks = 0;
|
||||
for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
|
||||
const unsigned Chunk = (Imm >> Shift) & Mask;
|
||||
if (Chunk == Mask)
|
||||
OneChunks++;
|
||||
else if (Chunk == 0)
|
||||
ZeroChunks++;
|
||||
}
|
||||
|
||||
// Since we can't materialize the constant with a single ORR instruction,
|
||||
// let's see whether we can materialize 3/4 of the constant with an ORR
|
||||
// instruction and use an additional MOVK instruction to materialize the
|
||||
// remaining 1/4.
|
||||
//
|
||||
// We are looking for constants with a pattern like: |A|X|B|X| or |X|A|X|B|.
|
||||
//
|
||||
// E.g. assuming |A|X|A|X| is a pattern which can be materialized with ORR,
|
||||
// we would create the following instruction sequence:
|
||||
//
|
||||
// ORR x0, xzr, |A|X|A|X|
|
||||
// MOVK x0, |B|, LSL #16
|
||||
//
|
||||
// Only look at 64-bit constants which can't be materialized with a single
|
||||
// instruction e.g. which have less than either three all zero or all one
|
||||
// chunks.
|
||||
//
|
||||
// Ignore 32-bit constants here, they always can be materialized with a
|
||||
// MOVZ/MOVN + MOVK pair. Since the 32-bit constant can't be materialized
|
||||
// with a single ORR, the best sequence we can achieve is a ORR + MOVK pair.
|
||||
// Thus we fall back to the default code below which in the best case creates
|
||||
// a single MOVZ/MOVN instruction (in case one chunk is all zero or all one).
|
||||
//
|
||||
if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) {
|
||||
// If we interpret the 64-bit constant as a v4i16, are elements 0 and 2
|
||||
// identical?
|
||||
if (getChunk(UImm, 0) == getChunk(UImm, 2)) {
|
||||
// See if we can come up with a constant which can be materialized with
|
||||
// ORR-immediate by replicating element 3 into element 1.
|
||||
uint64_t OrrImm = replicateChunk(UImm, 3, 1);
|
||||
if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 1))
|
||||
return true;
|
||||
|
||||
// See if we can come up with a constant which can be materialized with
|
||||
// ORR-immediate by replicating element 1 into element 3.
|
||||
OrrImm = replicateChunk(UImm, 1, 3);
|
||||
if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 3))
|
||||
return true;
|
||||
|
||||
// If we interpret the 64-bit constant as a v4i16, are elements 1 and 3
|
||||
// identical?
|
||||
} else if (getChunk(UImm, 1) == getChunk(UImm, 3)) {
|
||||
// See if we can come up with a constant which can be materialized with
|
||||
// ORR-immediate by replicating element 2 into element 0.
|
||||
uint64_t OrrImm = replicateChunk(UImm, 2, 0);
|
||||
if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 0))
|
||||
return true;
|
||||
|
||||
// See if we can come up with a constant which can be materialized with
|
||||
// ORR-immediate by replicating element 1 into element 3.
|
||||
OrrImm = replicateChunk(UImm, 0, 2);
|
||||
if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 2))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for identical 16-bit chunks within the constant and if so materialize
|
||||
// them with a single ORR instruction. The remaining one or two 16-bit chunks
|
||||
// will be materialized with MOVK instructions.
|
||||
if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII))
|
||||
return true;
|
||||
|
||||
// Check whether the constant contains a sequence of contiguous ones, which
|
||||
// might be interrupted by one or two chunks. If so, materialize the sequence
|
||||
// of contiguous ones with an ORR instruction. Materialize the chunks which
|
||||
// are either interrupting the sequence or outside of the sequence with a
|
||||
// MOVK instruction.
|
||||
if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII))
|
||||
return true;
|
||||
|
||||
// Use a MOVZ or MOVN instruction to set the high bits, followed by one or
|
||||
// more MOVK instructions to insert additional 16-bit portions into the
|
||||
// lower bits.
|
||||
bool isNeg = false;
|
||||
|
||||
// Use MOVN to materialize the high bits if we have more all one chunks
|
||||
// than all zero chunks.
|
||||
if (OneChunks > ZeroChunks) {
|
||||
isNeg = true;
|
||||
Imm = ~Imm;
|
||||
}
|
||||
|
||||
unsigned FirstOpc;
|
||||
if (BitSize == 32) {
|
||||
Imm &= (1LL << 32) - 1;
|
||||
FirstOpc = (isNeg ? ARM64::MOVNWi : ARM64::MOVZWi);
|
||||
} else {
|
||||
FirstOpc = (isNeg ? ARM64::MOVNXi : ARM64::MOVZXi);
|
||||
}
|
||||
unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN
|
||||
unsigned LastShift = 0; // LSL amount for last MOVK
|
||||
if (Imm != 0) {
|
||||
unsigned LZ = countLeadingZeros(Imm);
|
||||
unsigned TZ = countTrailingZeros(Imm);
|
||||
Shift = ((63 - LZ) / 16) * 16;
|
||||
LastShift = (TZ / 16) * 16;
|
||||
}
|
||||
unsigned Imm16 = (Imm >> Shift) & Mask;
|
||||
unsigned DstReg = MI.getOperand(0).getReg();
|
||||
bool DstIsDead = MI.getOperand(0).isDead();
|
||||
MachineInstrBuilder MIB1 =
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc))
|
||||
.addReg(DstReg, RegState::Define |
|
||||
getDeadRegState(DstIsDead && Shift == LastShift))
|
||||
.addImm(Imm16)
|
||||
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, Shift));
|
||||
|
||||
// If a MOVN was used for the high bits of a negative value, flip the rest
|
||||
// of the bits back for use with MOVK.
|
||||
if (isNeg)
|
||||
Imm = ~Imm;
|
||||
|
||||
if (Shift == LastShift) {
|
||||
transferImpOps(MI, MIB1, MIB1);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
MachineInstrBuilder MIB2;
|
||||
unsigned Opc = (BitSize == 32 ? ARM64::MOVKWi : ARM64::MOVKXi);
|
||||
while (Shift != LastShift) {
|
||||
Shift -= 16;
|
||||
Imm16 = (Imm >> Shift) & Mask;
|
||||
if (Imm16 == (isNeg ? Mask : 0))
|
||||
continue; // This 16-bit portion is already set correctly.
|
||||
MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
|
||||
.addReg(DstReg,
|
||||
RegState::Define |
|
||||
getDeadRegState(DstIsDead && Shift == LastShift))
|
||||
.addReg(DstReg)
|
||||
.addImm(Imm16)
|
||||
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, Shift));
|
||||
}
|
||||
|
||||
transferImpOps(MI, MIB1, MIB2);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief If MBBI references a pseudo instruction that should be expanded here,
|
||||
/// do the expansion and return true. Otherwise return false.
|
||||
bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI) {
|
||||
MachineInstr &MI = *MBBI;
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
switch (Opcode) {
|
||||
default:
|
||||
break;
|
||||
|
||||
case ARM64::ADDWrr:
|
||||
case ARM64::SUBWrr:
|
||||
case ARM64::ADDXrr:
|
||||
case ARM64::SUBXrr:
|
||||
case ARM64::ADDSWrr:
|
||||
case ARM64::SUBSWrr:
|
||||
case ARM64::ADDSXrr:
|
||||
case ARM64::SUBSXrr:
|
||||
case ARM64::ANDWrr:
|
||||
case ARM64::ANDXrr:
|
||||
case ARM64::BICWrr:
|
||||
case ARM64::BICXrr:
|
||||
case ARM64::EONWrr:
|
||||
case ARM64::EONXrr:
|
||||
case ARM64::EORWrr:
|
||||
case ARM64::EORXrr:
|
||||
case ARM64::ORNWrr:
|
||||
case ARM64::ORNXrr:
|
||||
case ARM64::ORRWrr:
|
||||
case ARM64::ORRXrr: {
|
||||
unsigned Opcode;
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
case ARM64::ADDWrr: Opcode = ARM64::ADDWrs; break;
|
||||
case ARM64::SUBWrr: Opcode = ARM64::SUBWrs; break;
|
||||
case ARM64::ADDXrr: Opcode = ARM64::ADDXrs; break;
|
||||
case ARM64::SUBXrr: Opcode = ARM64::SUBXrs; break;
|
||||
case ARM64::ADDSWrr: Opcode = ARM64::ADDSWrs; break;
|
||||
case ARM64::SUBSWrr: Opcode = ARM64::SUBSWrs; break;
|
||||
case ARM64::ADDSXrr: Opcode = ARM64::ADDSXrs; break;
|
||||
case ARM64::SUBSXrr: Opcode = ARM64::SUBSXrs; break;
|
||||
case ARM64::ANDWrr: Opcode = ARM64::ANDWrs; break;
|
||||
case ARM64::ANDXrr: Opcode = ARM64::ANDXrs; break;
|
||||
case ARM64::BICWrr: Opcode = ARM64::BICWrs; break;
|
||||
case ARM64::BICXrr: Opcode = ARM64::BICXrs; break;
|
||||
case ARM64::EONWrr: Opcode = ARM64::EONWrs; break;
|
||||
case ARM64::EONXrr: Opcode = ARM64::EONXrs; break;
|
||||
case ARM64::EORWrr: Opcode = ARM64::EORWrs; break;
|
||||
case ARM64::EORXrr: Opcode = ARM64::EORXrs; break;
|
||||
case ARM64::ORNWrr: Opcode = ARM64::ORNWrs; break;
|
||||
case ARM64::ORNXrr: Opcode = ARM64::ORNXrs; break;
|
||||
case ARM64::ORRWrr: Opcode = ARM64::ORRWrs; break;
|
||||
case ARM64::ORRXrr: Opcode = ARM64::ORRXrs; break;
|
||||
}
|
||||
MachineInstrBuilder MIB1 =
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
|
||||
MI.getOperand(0).getReg())
|
||||
.addOperand(MI.getOperand(1))
|
||||
.addOperand(MI.getOperand(2))
|
||||
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0));
|
||||
transferImpOps(MI, MIB1, MIB1);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
case ARM64::LOADgot: {
|
||||
// Expand into ADRP + LDR.
|
||||
unsigned DstReg = MI.getOperand(0).getReg();
|
||||
const MachineOperand &MO1 = MI.getOperand(1);
|
||||
unsigned Flags = MO1.getTargetFlags();
|
||||
MachineInstrBuilder MIB1 =
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADRP), DstReg);
|
||||
MachineInstrBuilder MIB2 =
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::LDRXui))
|
||||
.addOperand(MI.getOperand(0))
|
||||
.addReg(DstReg);
|
||||
|
||||
if (MO1.isGlobal()) {
|
||||
MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | ARM64II::MO_PAGE);
|
||||
MIB2.addGlobalAddress(MO1.getGlobal(), 0,
|
||||
Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
|
||||
} else if (MO1.isSymbol()) {
|
||||
MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | ARM64II::MO_PAGE);
|
||||
MIB2.addExternalSymbol(MO1.getSymbolName(),
|
||||
Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
|
||||
} else {
|
||||
assert(MO1.isCPI() &&
|
||||
"Only expect globals, externalsymbols, or constant pools");
|
||||
MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
|
||||
Flags | ARM64II::MO_PAGE);
|
||||
MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
|
||||
Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
|
||||
}
|
||||
|
||||
transferImpOps(MI, MIB1, MIB2);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
case ARM64::MOVaddr:
|
||||
case ARM64::MOVaddrJT:
|
||||
case ARM64::MOVaddrCP:
|
||||
case ARM64::MOVaddrBA:
|
||||
case ARM64::MOVaddrTLS:
|
||||
case ARM64::MOVaddrEXT: {
|
||||
// Expand into ADRP + ADD.
|
||||
unsigned DstReg = MI.getOperand(0).getReg();
|
||||
MachineInstrBuilder MIB1 =
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADRP), DstReg)
|
||||
.addOperand(MI.getOperand(1));
|
||||
|
||||
MachineInstrBuilder MIB2 =
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADDXri))
|
||||
.addOperand(MI.getOperand(0))
|
||||
.addReg(DstReg)
|
||||
.addOperand(MI.getOperand(2))
|
||||
.addImm(0);
|
||||
|
||||
transferImpOps(MI, MIB1, MIB2);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
case ARM64::MOVi32imm:
|
||||
return expandMOVImm(MBB, MBBI, 32);
|
||||
case ARM64::MOVi64imm:
|
||||
return expandMOVImm(MBB, MBBI, 64);
|
||||
case ARM64::RET_ReallyLR:
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::RET))
|
||||
.addReg(ARM64::LR);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \brief Iterate over the instructions in basic block MBB and expand any
|
||||
/// pseudo instructions. Return true if anything was modified.
|
||||
bool ARM64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
|
||||
bool Modified = false;
|
||||
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
|
||||
while (MBBI != E) {
|
||||
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
|
||||
Modified |= expandMI(MBB, MBBI);
|
||||
MBBI = NMBBI;
|
||||
}
|
||||
|
||||
return Modified;
|
||||
}
|
||||
|
||||
bool ARM64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
|
||||
TII = static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
|
||||
|
||||
bool Modified = false;
|
||||
for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
|
||||
++MFI)
|
||||
Modified |= expandMBB(*MFI);
|
||||
return Modified;
|
||||
}
|
||||
|
||||
/// \brief Returns an instance of the pseudo instruction expansion pass.
|
||||
FunctionPass *llvm::createARM64ExpandPseudoPass() {
|
||||
return new ARM64ExpandPseudo();
|
||||
}
|
1929
lib/Target/ARM64/ARM64FastISel.cpp
Normal file
1929
lib/Target/ARM64/ARM64FastISel.cpp
Normal file
File diff suppressed because it is too large
Load Diff
818
lib/Target/ARM64/ARM64FrameLowering.cpp
Normal file
818
lib/Target/ARM64/ARM64FrameLowering.cpp
Normal file
@ -0,0 +1,818 @@
|
||||
//===- ARM64FrameLowering.cpp - ARM64 Frame Lowering -----------*- C++ -*-====//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the ARM64 implementation of TargetFrameLowering class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "frame-info"
|
||||
#include "ARM64FrameLowering.h"
|
||||
#include "ARM64InstrInfo.h"
|
||||
#include "ARM64MachineFunctionInfo.h"
|
||||
#include "ARM64Subtarget.h"
|
||||
#include "ARM64TargetMachine.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/RegisterScavenging.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool> EnableRedZone("arm64-redzone",
|
||||
cl::desc("enable use of redzone on ARM64"),
|
||||
cl::init(false), cl::Hidden);
|
||||
|
||||
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
|
||||
|
||||
static unsigned estimateStackSize(MachineFunction &MF) {
|
||||
const MachineFrameInfo *FFI = MF.getFrameInfo();
|
||||
int Offset = 0;
|
||||
for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
|
||||
int FixedOff = -FFI->getObjectOffset(i);
|
||||
if (FixedOff > Offset)
|
||||
Offset = FixedOff;
|
||||
}
|
||||
for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
|
||||
if (FFI->isDeadObjectIndex(i))
|
||||
continue;
|
||||
Offset += FFI->getObjectSize(i);
|
||||
unsigned Align = FFI->getObjectAlignment(i);
|
||||
// Adjust to alignment boundary
|
||||
Offset = (Offset + Align - 1) / Align * Align;
|
||||
}
|
||||
// This does not include the 16 bytes used for fp and lr.
|
||||
return (unsigned)Offset;
|
||||
}
|
||||
|
||||
bool ARM64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
|
||||
if (!EnableRedZone)
|
||||
return false;
|
||||
// Don't use the red zone if the function explicitly asks us not to.
|
||||
// This is typically used for kernel code.
|
||||
if (MF.getFunction()->getAttributes().hasAttribute(
|
||||
AttributeSet::FunctionIndex, Attribute::NoRedZone))
|
||||
return false;
|
||||
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
|
||||
unsigned NumBytes = AFI->getLocalStackSize();
|
||||
|
||||
// Note: currently hasFP() is always true for hasCalls(), but that's an
|
||||
// implementation detail of the current code, not a strict requirement,
|
||||
// so stay safe here and check both.
|
||||
if (MFI->hasCalls() || hasFP(MF) || NumBytes > 128)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// hasFP - Return true if the specified function should have a dedicated frame
|
||||
/// pointer register.
|
||||
bool ARM64FrameLowering::hasFP(const MachineFunction &MF) const {
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
|
||||
#ifndef NDEBUG
|
||||
const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
|
||||
assert(!RegInfo->needsStackRealignment(MF) &&
|
||||
"No stack realignment on ARM64!");
|
||||
#endif
|
||||
|
||||
return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
|
||||
MFI->isFrameAddressTaken());
|
||||
}
|
||||
|
||||
/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
|
||||
/// not required, we reserve argument space for call sites in the function
|
||||
/// immediately on entry to the current function. This eliminates the need for
|
||||
/// add/sub sp brackets around call sites. Returns true if the call frame is
|
||||
/// included as part of the stack frame.
|
||||
bool ARM64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
|
||||
return !MF.getFrameInfo()->hasVarSizedObjects();
|
||||
}
|
||||
|
||||
void ARM64FrameLowering::eliminateCallFramePseudoInstr(
|
||||
MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) const {
|
||||
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
|
||||
const ARM64InstrInfo *TII =
|
||||
static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
|
||||
if (!TFI->hasReservedCallFrame(MF)) {
|
||||
// If we have alloca, convert as follows:
|
||||
// ADJCALLSTACKDOWN -> sub, sp, sp, amount
|
||||
// ADJCALLSTACKUP -> add, sp, sp, amount
|
||||
MachineInstr *Old = I;
|
||||
DebugLoc DL = Old->getDebugLoc();
|
||||
unsigned Amount = Old->getOperand(0).getImm();
|
||||
if (Amount != 0) {
|
||||
// We need to keep the stack aligned properly. To do this, we round the
|
||||
// amount of space needed for the outgoing arguments up to the next
|
||||
// alignment boundary.
|
||||
unsigned Align = TFI->getStackAlignment();
|
||||
Amount = (Amount + Align - 1) / Align * Align;
|
||||
|
||||
// Replace the pseudo instruction with a new instruction...
|
||||
unsigned Opc = Old->getOpcode();
|
||||
if (Opc == ARM64::ADJCALLSTACKDOWN) {
|
||||
emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, -Amount, TII);
|
||||
} else {
|
||||
assert(Opc == ARM64::ADJCALLSTACKUP && "expected ADJCALLSTACKUP");
|
||||
emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, Amount, TII);
|
||||
}
|
||||
}
|
||||
}
|
||||
MBB.erase(I);
|
||||
}
|
||||
|
||||
void
|
||||
ARM64FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
unsigned FramePtr) const {
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
MachineModuleInfo &MMI = MF.getMMI();
|
||||
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
|
||||
const ARM64InstrInfo *TII = TM.getInstrInfo();
|
||||
DebugLoc DL = MBB.findDebugLoc(MBBI);
|
||||
|
||||
// Add callee saved registers to move list.
|
||||
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
|
||||
if (CSI.empty())
|
||||
return;
|
||||
|
||||
const DataLayout *TD = MF.getTarget().getDataLayout();
|
||||
bool HasFP = hasFP(MF);
|
||||
|
||||
// Calculate amount of bytes used for return address storing.
|
||||
int stackGrowth = -TD->getPointerSize(0);
|
||||
|
||||
// Calculate offsets.
|
||||
int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth;
|
||||
unsigned TotalSkipped = 0;
|
||||
for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
|
||||
E = CSI.end();
|
||||
I != E; ++I) {
|
||||
unsigned Reg = I->getReg();
|
||||
int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()) -
|
||||
getOffsetOfLocalArea() + saveAreaOffset;
|
||||
|
||||
// Don't output a new CFI directive if we're re-saving the frame pointer or
|
||||
// link register. This happens when the PrologEpilogInserter has inserted an
|
||||
// extra "STP" of the frame pointer and link register -- the "emitPrologue"
|
||||
// method automatically generates the directives when frame pointers are
|
||||
// used. If we generate CFI directives for the extra "STP"s, the linker will
|
||||
// lose track of the correct values for the frame pointer and link register.
|
||||
if (HasFP && (FramePtr == Reg || Reg == ARM64::LR)) {
|
||||
TotalSkipped += stackGrowth;
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
|
||||
unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
|
||||
nullptr, DwarfReg, Offset - TotalSkipped));
|
||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex);
|
||||
}
|
||||
}
|
||||
|
||||
void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const {
|
||||
MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin();
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
const Function *Fn = MF.getFunction();
|
||||
const ARM64RegisterInfo *RegInfo = TM.getRegisterInfo();
|
||||
const ARM64InstrInfo *TII = TM.getInstrInfo();
|
||||
MachineModuleInfo &MMI = MF.getMMI();
|
||||
ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
|
||||
bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
|
||||
bool HasFP = hasFP(MF);
|
||||
DebugLoc DL = MBB.findDebugLoc(MBBI);
|
||||
|
||||
int NumBytes = (int)MFI->getStackSize();
|
||||
if (!AFI->hasStackFrame()) {
|
||||
assert(!HasFP && "unexpected function without stack frame but with FP");
|
||||
|
||||
// All of the stack allocation is for locals.
|
||||
AFI->setLocalStackSize(NumBytes);
|
||||
|
||||
// Label used to tie together the PROLOG_LABEL and the MachineMoves.
|
||||
MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
|
||||
|
||||
// REDZONE: If the stack size is less than 128 bytes, we don't need
|
||||
// to actually allocate.
|
||||
if (NumBytes && !canUseRedZone(MF)) {
|
||||
emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII,
|
||||
MachineInstr::FrameSetup);
|
||||
|
||||
// Encode the stack size of the leaf function.
|
||||
unsigned CFIIndex = MMI.addFrameInst(
|
||||
MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
|
||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex);
|
||||
} else if (NumBytes) {
|
||||
++NumRedZoneFunctions;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Only set up FP if we actually need to.
|
||||
int FPOffset = 0;
|
||||
if (HasFP) {
|
||||
// First instruction must a) allocate the stack and b) have an immediate
|
||||
// that is a multiple of -2.
|
||||
assert((MBBI->getOpcode() == ARM64::STPXpre ||
|
||||
MBBI->getOpcode() == ARM64::STPDpre) &&
|
||||
MBBI->getOperand(2).getReg() == ARM64::SP &&
|
||||
MBBI->getOperand(3).getImm() < 0 &&
|
||||
(MBBI->getOperand(3).getImm() & 1) == 0);
|
||||
|
||||
// Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space
|
||||
// required for the callee saved register area we get the frame pointer
|
||||
// by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
|
||||
FPOffset = -(MBBI->getOperand(3).getImm() + 2) * 8;
|
||||
assert(FPOffset >= 0 && "Bad Framepointer Offset");
|
||||
}
|
||||
|
||||
// Move past the saves of the callee-saved registers.
|
||||
while (MBBI->getOpcode() == ARM64::STPXi ||
|
||||
MBBI->getOpcode() == ARM64::STPDi ||
|
||||
MBBI->getOpcode() == ARM64::STPXpre ||
|
||||
MBBI->getOpcode() == ARM64::STPDpre) {
|
||||
++MBBI;
|
||||
NumBytes -= 16;
|
||||
}
|
||||
assert(NumBytes >= 0 && "Negative stack allocation size!?");
|
||||
if (HasFP) {
|
||||
// Issue sub fp, sp, FPOffset or
|
||||
// mov fp,sp when FPOffset is zero.
|
||||
// Note: All stores of callee-saved registers are marked as "FrameSetup".
|
||||
// This code marks the instruction(s) that set the FP also.
|
||||
emitFrameOffset(MBB, MBBI, DL, ARM64::FP, ARM64::SP, FPOffset, TII,
|
||||
MachineInstr::FrameSetup);
|
||||
}
|
||||
|
||||
// All of the remaining stack allocations are for locals.
|
||||
AFI->setLocalStackSize(NumBytes);
|
||||
|
||||
// Allocate space for the rest of the frame.
|
||||
if (NumBytes) {
|
||||
// If we're a leaf function, try using the red zone.
|
||||
if (!canUseRedZone(MF))
|
||||
emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII,
|
||||
MachineInstr::FrameSetup);
|
||||
}
|
||||
|
||||
// If we need a base pointer, set it up here. It's whatever the value of the
|
||||
// stack pointer is at this point. Any variable size objects will be allocated
|
||||
// after this, so we can still use the base pointer to reference locals.
|
||||
//
|
||||
// FIXME: Clarify FrameSetup flags here.
|
||||
// Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
|
||||
// needed.
|
||||
//
|
||||
if (RegInfo->hasBasePointer(MF))
|
||||
TII->copyPhysReg(MBB, MBBI, DL, ARM64::X19, ARM64::SP, false);
|
||||
|
||||
if (needsFrameMoves) {
|
||||
const DataLayout *TD = MF.getTarget().getDataLayout();
|
||||
const int StackGrowth = -TD->getPointerSize(0);
|
||||
unsigned FramePtr = RegInfo->getFrameRegister(MF);
|
||||
|
||||
// An example of the prologue:
|
||||
//
|
||||
// .globl __foo
|
||||
// .align 2
|
||||
// __foo:
|
||||
// Ltmp0:
|
||||
// .cfi_startproc
|
||||
// .cfi_personality 155, ___gxx_personality_v0
|
||||
// Leh_func_begin:
|
||||
// .cfi_lsda 16, Lexception33
|
||||
//
|
||||
// stp xa,bx, [sp, -#offset]!
|
||||
// ...
|
||||
// stp x28, x27, [sp, #offset-32]
|
||||
// stp fp, lr, [sp, #offset-16]
|
||||
// add fp, sp, #offset - 16
|
||||
// sub sp, sp, #1360
|
||||
//
|
||||
// The Stack:
|
||||
// +-------------------------------------------+
|
||||
// 10000 | ........ | ........ | ........ | ........ |
|
||||
// 10004 | ........ | ........ | ........ | ........ |
|
||||
// +-------------------------------------------+
|
||||
// 10008 | ........ | ........ | ........ | ........ |
|
||||
// 1000c | ........ | ........ | ........ | ........ |
|
||||
// +===========================================+
|
||||
// 10010 | X28 Register |
|
||||
// 10014 | X28 Register |
|
||||
// +-------------------------------------------+
|
||||
// 10018 | X27 Register |
|
||||
// 1001c | X27 Register |
|
||||
// +===========================================+
|
||||
// 10020 | Frame Pointer |
|
||||
// 10024 | Frame Pointer |
|
||||
// +-------------------------------------------+
|
||||
// 10028 | Link Register |
|
||||
// 1002c | Link Register |
|
||||
// +===========================================+
|
||||
// 10030 | ........ | ........ | ........ | ........ |
|
||||
// 10034 | ........ | ........ | ........ | ........ |
|
||||
// +-------------------------------------------+
|
||||
// 10038 | ........ | ........ | ........ | ........ |
|
||||
// 1003c | ........ | ........ | ........ | ........ |
|
||||
// +-------------------------------------------+
|
||||
//
|
||||
// [sp] = 10030 :: >>initial value<<
|
||||
// sp = 10020 :: stp fp, lr, [sp, #-16]!
|
||||
// fp = sp == 10020 :: mov fp, sp
|
||||
// [sp] == 10020 :: stp x28, x27, [sp, #-16]!
|
||||
// sp == 10010 :: >>final value<<
|
||||
//
|
||||
// The frame pointer (w29) points to address 10020. If we use an offset of
|
||||
// '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
|
||||
// for w27, and -32 for w28:
|
||||
//
|
||||
// Ltmp1:
|
||||
// .cfi_def_cfa w29, 16
|
||||
// Ltmp2:
|
||||
// .cfi_offset w30, -8
|
||||
// Ltmp3:
|
||||
// .cfi_offset w29, -16
|
||||
// Ltmp4:
|
||||
// .cfi_offset w27, -24
|
||||
// Ltmp5:
|
||||
// .cfi_offset w28, -32
|
||||
|
||||
if (HasFP) {
|
||||
// Define the current CFA rule to use the provided FP.
|
||||
unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
|
||||
unsigned CFIIndex = MMI.addFrameInst(
|
||||
MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
|
||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex);
|
||||
|
||||
// Record the location of the stored LR
|
||||
unsigned LR = RegInfo->getDwarfRegNum(ARM64::LR, true);
|
||||
CFIIndex = MMI.addFrameInst(
|
||||
MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
|
||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex);
|
||||
|
||||
// Record the location of the stored FP
|
||||
CFIIndex = MMI.addFrameInst(
|
||||
MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
|
||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex);
|
||||
} else {
|
||||
// Encode the stack size of the leaf function.
|
||||
unsigned CFIIndex = MMI.addFrameInst(
|
||||
MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
|
||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex);
|
||||
}
|
||||
|
||||
// Now emit the moves for whatever callee saved regs we have.
|
||||
emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr);
|
||||
}
|
||||
}
|
||||
|
||||
static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
|
||||
for (unsigned i = 0; CSRegs[i]; ++i)
|
||||
if (Reg == CSRegs[i])
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) {
|
||||
if (MI->getOpcode() == ARM64::LDPXpost ||
|
||||
MI->getOpcode() == ARM64::LDPDpost || MI->getOpcode() == ARM64::LDPXi ||
|
||||
MI->getOpcode() == ARM64::LDPDi) {
|
||||
if (!isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) ||
|
||||
!isCalleeSavedRegister(MI->getOperand(1).getReg(), CSRegs) ||
|
||||
MI->getOperand(2).getReg() != ARM64::SP)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void ARM64FrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
|
||||
assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
const ARM64InstrInfo *TII =
|
||||
static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
|
||||
const ARM64RegisterInfo *RegInfo =
|
||||
static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
|
||||
DebugLoc DL = MBBI->getDebugLoc();
|
||||
|
||||
unsigned NumBytes = MFI->getStackSize();
|
||||
unsigned NumRestores = 0;
|
||||
// Move past the restores of the callee-saved registers.
|
||||
MachineBasicBlock::iterator LastPopI = MBBI;
|
||||
const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
|
||||
if (LastPopI != MBB.begin()) {
|
||||
do {
|
||||
++NumRestores;
|
||||
--LastPopI;
|
||||
} while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs));
|
||||
if (!isCSRestore(LastPopI, CSRegs)) {
|
||||
++LastPopI;
|
||||
--NumRestores;
|
||||
}
|
||||
}
|
||||
NumBytes -= NumRestores * 16;
|
||||
assert(NumBytes >= 0 && "Negative stack allocation size!?");
|
||||
|
||||
if (!hasFP(MF)) {
|
||||
// If this was a redzone leaf function, we don't need to restore the
|
||||
// stack pointer.
|
||||
if (!canUseRedZone(MF))
|
||||
emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::SP, NumBytes, TII);
|
||||
return;
|
||||
}
|
||||
|
||||
// Restore the original stack pointer.
|
||||
// FIXME: Rather than doing the math here, we should instead just use
|
||||
// non-post-indexed loads for the restores if we aren't actually going to
|
||||
// be able to save any instructions.
|
||||
if (NumBytes || MFI->hasVarSizedObjects())
|
||||
emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::FP,
|
||||
-(NumRestores - 1) * 16, TII, MachineInstr::NoFlags);
|
||||
}
|
||||
|
||||
/// getFrameIndexOffset - Returns the displacement from the frame register to
|
||||
/// the stack frame of the specified index.
|
||||
int ARM64FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
|
||||
int FI) const {
|
||||
unsigned FrameReg;
|
||||
return getFrameIndexReference(MF, FI, FrameReg);
|
||||
}
|
||||
|
||||
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
|
||||
/// debug info. It's the same as what we use for resolving the code-gen
|
||||
/// references for now. FIXME: This can go wrong when references are
|
||||
/// SP-relative and simple call frames aren't used.
|
||||
int ARM64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
|
||||
int FI,
|
||||
unsigned &FrameReg) const {
|
||||
return resolveFrameIndexReference(MF, FI, FrameReg);
|
||||
}
|
||||
|
||||
int ARM64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
|
||||
int FI, unsigned &FrameReg,
|
||||
bool PreferFP) const {
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
const ARM64RegisterInfo *RegInfo =
|
||||
static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
|
||||
const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
|
||||
int FPOffset = MFI->getObjectOffset(FI) + 16;
|
||||
int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
|
||||
bool isFixed = MFI->isFixedObjectIndex(FI);
|
||||
|
||||
// Use frame pointer to reference fixed objects. Use it for locals if
|
||||
// there are VLAs (and thus the SP isn't reliable as a base).
|
||||
// Make sure useFPForScavengingIndex() does the right thing for the emergency
|
||||
// spill slot.
|
||||
bool UseFP = false;
|
||||
if (AFI->hasStackFrame()) {
|
||||
// Note: Keeping the following as multiple 'if' statements rather than
|
||||
// merging to a single expression for readability.
|
||||
//
|
||||
// Argument access should always use the FP.
|
||||
if (isFixed) {
|
||||
UseFP = hasFP(MF);
|
||||
} else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
|
||||
// Use SP or FP, whichever gives us the best chance of the offset
|
||||
// being in range for direct access. If the FPOffset is positive,
|
||||
// that'll always be best, as the SP will be even further away.
|
||||
// If the FPOffset is negative, we have to keep in mind that the
|
||||
// available offset range for negative offsets is smaller than for
|
||||
// positive ones. If we have variable sized objects, we're stuck with
|
||||
// using the FP regardless, though, as the SP offset is unknown
|
||||
// and we don't have a base pointer available. If an offset is
|
||||
// available via the FP and the SP, use whichever is closest.
|
||||
if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 ||
|
||||
(FPOffset >= -256 && Offset > -FPOffset))
|
||||
UseFP = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (UseFP) {
|
||||
FrameReg = RegInfo->getFrameRegister(MF);
|
||||
return FPOffset;
|
||||
}
|
||||
|
||||
// Use the base pointer if we have one.
|
||||
if (RegInfo->hasBasePointer(MF))
|
||||
FrameReg = RegInfo->getBaseRegister();
|
||||
else {
|
||||
FrameReg = ARM64::SP;
|
||||
// If we're using the red zone for this function, the SP won't actually
|
||||
// be adjusted, so the offsets will be negative. They're also all
|
||||
// within range of the signed 9-bit immediate instructions.
|
||||
if (canUseRedZone(MF))
|
||||
Offset -= AFI->getLocalStackSize();
|
||||
}
|
||||
|
||||
return Offset;
|
||||
}
|
||||
|
||||
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
|
||||
if (Reg != ARM64::LR)
|
||||
return getKillRegState(true);
|
||||
|
||||
// LR maybe referred to later by an @llvm.returnaddress intrinsic.
|
||||
bool LRLiveIn = MF.getRegInfo().isLiveIn(ARM64::LR);
|
||||
bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken());
|
||||
return getKillRegState(LRKill);
|
||||
}
|
||||
|
||||
bool ARM64FrameLowering::spillCalleeSavedRegisters(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
|
||||
const std::vector<CalleeSavedInfo> &CSI,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
|
||||
unsigned Count = CSI.size();
|
||||
DebugLoc DL;
|
||||
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
|
||||
|
||||
if (MI != MBB.end())
|
||||
DL = MI->getDebugLoc();
|
||||
|
||||
for (unsigned i = 0; i < Count; i += 2) {
|
||||
unsigned idx = Count - i - 2;
|
||||
unsigned Reg1 = CSI[idx].getReg();
|
||||
unsigned Reg2 = CSI[idx + 1].getReg();
|
||||
// GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
|
||||
// list to come in sorted by frame index so that we can issue the store
|
||||
// pair instructions directly. Assert if we see anything otherwise.
|
||||
//
|
||||
// The order of the registers in the list is controlled by
|
||||
// getCalleeSavedRegs(), so they will always be in-order, as well.
|
||||
assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() &&
|
||||
"Out of order callee saved regs!");
|
||||
unsigned StrOpc;
|
||||
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
|
||||
assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
|
||||
// Issue sequence of non-sp increment and pi sp spills for cs regs. The
|
||||
// first spill is a pre-increment that allocates the stack.
|
||||
// For example:
|
||||
// stp x22, x21, [sp, #-48]! // addImm(-6)
|
||||
// stp x20, x19, [sp, #16] // addImm(+2)
|
||||
// stp fp, lr, [sp, #32] // addImm(+4)
|
||||
// Rationale: This sequence saves uop updates compared to a sequence of
|
||||
// pre-increment spills like stp xi,xj,[sp,#-16]!
|
||||
// Note: Similar rational and sequence for restores in epilog.
|
||||
if (ARM64::GPR64RegClass.contains(Reg1)) {
|
||||
assert(ARM64::GPR64RegClass.contains(Reg2) &&
|
||||
"Expected GPR64 callee-saved register pair!");
|
||||
// For first spill use pre-increment store.
|
||||
if (i == 0)
|
||||
StrOpc = ARM64::STPXpre;
|
||||
else
|
||||
StrOpc = ARM64::STPXi;
|
||||
} else if (ARM64::FPR64RegClass.contains(Reg1)) {
|
||||
assert(ARM64::FPR64RegClass.contains(Reg2) &&
|
||||
"Expected FPR64 callee-saved register pair!");
|
||||
// For first spill use pre-increment store.
|
||||
if (i == 0)
|
||||
StrOpc = ARM64::STPDpre;
|
||||
else
|
||||
StrOpc = ARM64::STPDi;
|
||||
} else
|
||||
llvm_unreachable("Unexpected callee saved register!");
|
||||
DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", "
|
||||
<< TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx()
|
||||
<< ", " << CSI[idx + 1].getFrameIdx() << ")\n");
|
||||
// Compute offset: i = 0 => offset = -Count;
|
||||
// i = 2 => offset = -(Count - 2) + Count = 2 = i; etc.
|
||||
const int Offset = (i == 0) ? -Count : i;
|
||||
assert((Offset >= -64 && Offset <= 63) &&
|
||||
"Offset out of bounds for STP immediate");
|
||||
BuildMI(MBB, MI, DL, TII.get(StrOpc))
|
||||
.addReg(Reg2, getPrologueDeath(MF, Reg2))
|
||||
.addReg(Reg1, getPrologueDeath(MF, Reg1))
|
||||
.addReg(ARM64::SP)
|
||||
.addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARM64FrameLowering::restoreCalleeSavedRegisters(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
|
||||
const std::vector<CalleeSavedInfo> &CSI,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
|
||||
unsigned Count = CSI.size();
|
||||
DebugLoc DL;
|
||||
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
|
||||
|
||||
if (MI != MBB.end())
|
||||
DL = MI->getDebugLoc();
|
||||
|
||||
for (unsigned i = 0; i < Count; i += 2) {
|
||||
unsigned Reg1 = CSI[i].getReg();
|
||||
unsigned Reg2 = CSI[i + 1].getReg();
|
||||
// GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
|
||||
// list to come in sorted by frame index so that we can issue the store
|
||||
// pair instructions directly. Assert if we see anything otherwise.
|
||||
assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() &&
|
||||
"Out of order callee saved regs!");
|
||||
// Issue sequence of non-sp increment and sp-pi restores for cs regs. Only
|
||||
// the last load is sp-pi post-increment and de-allocates the stack:
|
||||
// For example:
|
||||
// ldp fp, lr, [sp, #32] // addImm(+4)
|
||||
// ldp x20, x19, [sp, #16] // addImm(+2)
|
||||
// ldp x22, x21, [sp], #48 // addImm(+6)
|
||||
// Note: see comment in spillCalleeSavedRegisters()
|
||||
unsigned LdrOpc;
|
||||
|
||||
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
|
||||
assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
|
||||
if (ARM64::GPR64RegClass.contains(Reg1)) {
|
||||
assert(ARM64::GPR64RegClass.contains(Reg2) &&
|
||||
"Expected GPR64 callee-saved register pair!");
|
||||
if (i == Count - 2)
|
||||
LdrOpc = ARM64::LDPXpost;
|
||||
else
|
||||
LdrOpc = ARM64::LDPXi;
|
||||
} else if (ARM64::FPR64RegClass.contains(Reg1)) {
|
||||
assert(ARM64::FPR64RegClass.contains(Reg2) &&
|
||||
"Expected FPR64 callee-saved register pair!");
|
||||
if (i == Count - 2)
|
||||
LdrOpc = ARM64::LDPDpost;
|
||||
else
|
||||
LdrOpc = ARM64::LDPDi;
|
||||
} else
|
||||
llvm_unreachable("Unexpected callee saved register!");
|
||||
DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", "
|
||||
<< TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx()
|
||||
<< ", " << CSI[i + 1].getFrameIdx() << ")\n");
|
||||
|
||||
// Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4;
|
||||
// etc.
|
||||
const int Offset = (i == Count - 2) ? Count : Count - i - 2;
|
||||
assert((Offset >= -64 && Offset <= 63) &&
|
||||
"Offset out of bounds for LDP immediate");
|
||||
BuildMI(MBB, MI, DL, TII.get(LdrOpc))
|
||||
.addReg(Reg2, getDefRegState(true))
|
||||
.addReg(Reg1, getDefRegState(true))
|
||||
.addReg(ARM64::SP)
|
||||
.addImm(Offset); // [sp], #offset * 8 or [sp, #offset * 8]
|
||||
// where the factor * 8 is implicit
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan(
|
||||
MachineFunction &MF, RegScavenger *RS) const {
|
||||
const ARM64RegisterInfo *RegInfo =
|
||||
static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
|
||||
ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
|
||||
MachineRegisterInfo *MRI = &MF.getRegInfo();
|
||||
SmallVector<unsigned, 4> UnspilledCSGPRs;
|
||||
SmallVector<unsigned, 4> UnspilledCSFPRs;
|
||||
|
||||
// The frame record needs to be created by saving the appropriate registers
|
||||
if (hasFP(MF)) {
|
||||
MRI->setPhysRegUsed(ARM64::FP);
|
||||
MRI->setPhysRegUsed(ARM64::LR);
|
||||
}
|
||||
|
||||
// Spill the BasePtr if it's used. Do this first thing so that the
|
||||
// getCalleeSavedRegs() below will get the right answer.
|
||||
if (RegInfo->hasBasePointer(MF))
|
||||
MRI->setPhysRegUsed(RegInfo->getBaseRegister());
|
||||
|
||||
// If any callee-saved registers are used, the frame cannot be eliminated.
|
||||
unsigned NumGPRSpilled = 0;
|
||||
unsigned NumFPRSpilled = 0;
|
||||
bool ExtraCSSpill = false;
|
||||
bool CanEliminateFrame = true;
|
||||
DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:");
|
||||
const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
|
||||
|
||||
// Check pairs of consecutive callee-saved registers.
|
||||
for (unsigned i = 0; CSRegs[i]; i += 2) {
|
||||
assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");
|
||||
|
||||
const unsigned OddReg = CSRegs[i];
|
||||
const unsigned EvenReg = CSRegs[i + 1];
|
||||
assert((ARM64::GPR64RegClass.contains(OddReg) &&
|
||||
ARM64::GPR64RegClass.contains(EvenReg)) ^
|
||||
(ARM64::FPR64RegClass.contains(OddReg) &&
|
||||
ARM64::FPR64RegClass.contains(EvenReg)) &&
|
||||
"Register class mismatch!");
|
||||
|
||||
const bool OddRegUsed = MRI->isPhysRegUsed(OddReg);
|
||||
const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg);
|
||||
|
||||
// Early exit if none of the registers in the register pair is actually
|
||||
// used.
|
||||
if (!OddRegUsed && !EvenRegUsed) {
|
||||
if (ARM64::GPR64RegClass.contains(OddReg)) {
|
||||
UnspilledCSGPRs.push_back(OddReg);
|
||||
UnspilledCSGPRs.push_back(EvenReg);
|
||||
} else {
|
||||
UnspilledCSFPRs.push_back(OddReg);
|
||||
UnspilledCSFPRs.push_back(EvenReg);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned Reg = ARM64::NoRegister;
|
||||
// If only one of the registers of the register pair is used, make sure to
|
||||
// mark the other one as used as well.
|
||||
if (OddRegUsed ^ EvenRegUsed) {
|
||||
// Find out which register is the additional spill.
|
||||
Reg = OddRegUsed ? EvenReg : OddReg;
|
||||
MRI->setPhysRegUsed(Reg);
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
|
||||
DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));
|
||||
|
||||
assert(((OddReg == ARM64::LR && EvenReg == ARM64::FP) ||
|
||||
(RegInfo->getEncodingValue(OddReg) + 1 ==
|
||||
RegInfo->getEncodingValue(EvenReg))) &&
|
||||
"Register pair of non-adjacent registers!");
|
||||
if (ARM64::GPR64RegClass.contains(OddReg)) {
|
||||
NumGPRSpilled += 2;
|
||||
// If it's not a reserved register, we can use it in lieu of an
|
||||
// emergency spill slot for the register scavenger.
|
||||
// FIXME: It would be better to instead keep looking and choose another
|
||||
// unspilled register that isn't reserved, if there is one.
|
||||
if (Reg != ARM64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
|
||||
ExtraCSSpill = true;
|
||||
} else
|
||||
NumFPRSpilled += 2;
|
||||
|
||||
CanEliminateFrame = false;
|
||||
}
|
||||
|
||||
// FIXME: Set BigStack if any stack slot references may be out of range.
|
||||
// For now, just conservatively guestimate based on unscaled indexing
|
||||
// range. We'll end up allocating an unnecessary spill slot a lot, but
|
||||
// realistically that's not a big deal at this stage of the game.
|
||||
// The CSR spill slots have not been allocated yet, so estimateStackSize
|
||||
// won't include them.
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
|
||||
DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
|
||||
bool BigStack = (CFSize >= 256);
|
||||
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
|
||||
AFI->setHasStackFrame(true);
|
||||
|
||||
// Estimate if we might need to scavenge a register at some point in order
|
||||
// to materialize a stack offset. If so, either spill one additional
|
||||
// callee-saved register or reserve a special spill slot to facilitate
|
||||
// register scavenging. If we already spilled an extra callee-saved register
|
||||
// above to keep the number of spills even, we don't need to do anything else
|
||||
// here.
|
||||
if (BigStack && !ExtraCSSpill) {
|
||||
|
||||
// If we're adding a register to spill here, we have to add two of them
|
||||
// to keep the number of regs to spill even.
|
||||
assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
|
||||
unsigned Count = 0;
|
||||
while (!UnspilledCSGPRs.empty() && Count < 2) {
|
||||
unsigned Reg = UnspilledCSGPRs.back();
|
||||
UnspilledCSGPRs.pop_back();
|
||||
DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
|
||||
<< " to get a scratch register.\n");
|
||||
MRI->setPhysRegUsed(Reg);
|
||||
ExtraCSSpill = true;
|
||||
++Count;
|
||||
}
|
||||
|
||||
// If we didn't find an extra callee-saved register to spill, create
|
||||
// an emergency spill slot.
|
||||
if (!ExtraCSSpill) {
|
||||
const TargetRegisterClass *RC = &ARM64::GPR64RegClass;
|
||||
int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
|
||||
RS->addScavengingFrameIndex(FI);
|
||||
DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
|
||||
<< " as the emergency spill slot.\n");
|
||||
}
|
||||
}
|
||||
}
|
75
lib/Target/ARM64/ARM64FrameLowering.h
Normal file
75
lib/Target/ARM64/ARM64FrameLowering.h
Normal file
@ -0,0 +1,75 @@
|
||||
//===-- ARM64FrameLowering.h - TargetFrameLowering for ARM64 ----*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef ARM64_FRAMELOWERING_H
|
||||
#define ARM64_FRAMELOWERING_H
|
||||
|
||||
#include "llvm/Target/TargetFrameLowering.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class ARM64Subtarget;
|
||||
class ARM64TargetMachine;
|
||||
|
||||
class ARM64FrameLowering : public TargetFrameLowering {
|
||||
const ARM64TargetMachine &TM;
|
||||
|
||||
public:
|
||||
explicit ARM64FrameLowering(const ARM64TargetMachine &TM,
|
||||
const ARM64Subtarget &STI)
|
||||
: TargetFrameLowering(StackGrowsDown, 16, 0, 16,
|
||||
false /*StackRealignable*/),
|
||||
TM(TM) {}
|
||||
|
||||
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
unsigned FramePtr) const;
|
||||
|
||||
void eliminateCallFramePseudoInstr(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) const;
|
||||
|
||||
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
|
||||
/// the function.
|
||||
void emitPrologue(MachineFunction &MF) const;
|
||||
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
|
||||
|
||||
int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
|
||||
int getFrameIndexReference(const MachineFunction &MF, int FI,
|
||||
unsigned &FrameReg) const;
|
||||
int resolveFrameIndexReference(const MachineFunction &MF, int FI,
|
||||
unsigned &FrameReg,
|
||||
bool PreferFP = false) const;
|
||||
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
const std::vector<CalleeSavedInfo> &CSI,
|
||||
const TargetRegisterInfo *TRI) const;
|
||||
|
||||
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
const std::vector<CalleeSavedInfo> &CSI,
|
||||
const TargetRegisterInfo *TRI) const;
|
||||
|
||||
/// \brief Can this function use the red zone for local allocations.
|
||||
bool canUseRedZone(const MachineFunction &MF) const;
|
||||
|
||||
bool hasFP(const MachineFunction &MF) const;
|
||||
bool hasReservedCallFrame(const MachineFunction &MF) const;
|
||||
|
||||
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
|
||||
RegScavenger *RS) const;
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif
|
2395
lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
Normal file
2395
lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
Normal file
File diff suppressed because it is too large
Load Diff
7587
lib/Target/ARM64/ARM64ISelLowering.cpp
Normal file
7587
lib/Target/ARM64/ARM64ISelLowering.cpp
Normal file
File diff suppressed because it is too large
Load Diff
423
lib/Target/ARM64/ARM64ISelLowering.h
Normal file
423
lib/Target/ARM64/ARM64ISelLowering.h
Normal file
@ -0,0 +1,423 @@
|
||||
//==-- ARM64ISelLowering.h - ARM64 DAG Lowering Interface --------*- C++ -*-==//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the interfaces that ARM64 uses to lower LLVM code into a
|
||||
// selection DAG.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TARGET_ARM64_ISELLOWERING_H
|
||||
#define LLVM_TARGET_ARM64_ISELLOWERING_H
|
||||
|
||||
#include "llvm/CodeGen/CallingConvLower.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include "llvm/IR/CallingConv.h"
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
namespace ARM64ISD {
|
||||
|
||||
enum {
|
||||
FIRST_NUMBER = ISD::BUILTIN_OP_END,
|
||||
WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
|
||||
CALL, // Function call.
|
||||
|
||||
// Almost the same as a normal call node, except that a TLSDesc relocation is
|
||||
// needed so the linker can relax it correctly if possible.
|
||||
TLSDESC_CALL,
|
||||
ADRP, // Page address of a TargetGlobalAddress operand.
|
||||
ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
|
||||
LOADgot, // Load from automatically generated descriptor (e.g. Global
|
||||
// Offset Table, TLS record).
|
||||
RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
|
||||
BRCOND, // Conditional branch instruction; "b.cond".
|
||||
CSEL,
|
||||
FCSEL, // Conditional move instruction.
|
||||
CSINV, // Conditional select invert.
|
||||
CSNEG, // Conditional select negate.
|
||||
CSINC, // Conditional select increment.
|
||||
|
||||
// Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
|
||||
// ELF.
|
||||
THREAD_POINTER,
|
||||
ADC,
|
||||
SBC, // adc, sbc instructions
|
||||
|
||||
// Arithmetic instructions which write flags.
|
||||
ADDS,
|
||||
SUBS,
|
||||
ADCS,
|
||||
SBCS,
|
||||
ANDS,
|
||||
|
||||
// Floating point comparison
|
||||
FCMP,
|
||||
|
||||
// Floating point max and min instructions.
|
||||
FMAX,
|
||||
FMIN,
|
||||
|
||||
// Scalar extract
|
||||
EXTR,
|
||||
|
||||
// Scalar-to-vector duplication
|
||||
DUP,
|
||||
DUPLANE8,
|
||||
DUPLANE16,
|
||||
DUPLANE32,
|
||||
DUPLANE64,
|
||||
|
||||
// Vector immedate moves
|
||||
MOVI,
|
||||
MOVIshift,
|
||||
MOVIedit,
|
||||
MOVImsl,
|
||||
FMOV,
|
||||
MVNIshift,
|
||||
MVNImsl,
|
||||
|
||||
// Vector immediate ops
|
||||
BICi,
|
||||
ORRi,
|
||||
|
||||
// Vector arithmetic negation
|
||||
NEG,
|
||||
|
||||
// Vector shuffles
|
||||
ZIP1,
|
||||
ZIP2,
|
||||
UZP1,
|
||||
UZP2,
|
||||
TRN1,
|
||||
TRN2,
|
||||
REV16,
|
||||
REV32,
|
||||
REV64,
|
||||
EXT,
|
||||
|
||||
// Vector shift by scalar
|
||||
VSHL,
|
||||
VLSHR,
|
||||
VASHR,
|
||||
|
||||
// Vector shift by scalar (again)
|
||||
SQSHL_I,
|
||||
UQSHL_I,
|
||||
SQSHLU_I,
|
||||
SRSHR_I,
|
||||
URSHR_I,
|
||||
|
||||
// Vector comparisons
|
||||
CMEQ,
|
||||
CMGE,
|
||||
CMGT,
|
||||
CMHI,
|
||||
CMHS,
|
||||
FCMEQ,
|
||||
FCMGE,
|
||||
FCMGT,
|
||||
|
||||
// Vector zero comparisons
|
||||
CMEQz,
|
||||
CMGEz,
|
||||
CMGTz,
|
||||
CMLEz,
|
||||
CMLTz,
|
||||
FCMEQz,
|
||||
FCMGEz,
|
||||
FCMGTz,
|
||||
FCMLEz,
|
||||
FCMLTz,
|
||||
|
||||
// Vector bitwise negation
|
||||
NOT,
|
||||
|
||||
// Vector bitwise selection
|
||||
BIT,
|
||||
|
||||
// Compare-and-branch
|
||||
CBZ,
|
||||
CBNZ,
|
||||
TBZ,
|
||||
TBNZ,
|
||||
|
||||
// Tail calls
|
||||
TC_RETURN,
|
||||
|
||||
// Custom prefetch handling
|
||||
PREFETCH,
|
||||
|
||||
// {s|u}int to FP within a FP register.
|
||||
SITOF,
|
||||
UITOF
|
||||
};
|
||||
|
||||
} // end namespace ARM64ISD
|
||||
|
||||
class ARM64Subtarget;
|
||||
class ARM64TargetMachine;
|
||||
|
||||
class ARM64TargetLowering : public TargetLowering {
|
||||
bool RequireStrictAlign;
|
||||
|
||||
public:
|
||||
explicit ARM64TargetLowering(ARM64TargetMachine &TM);
|
||||
|
||||
/// Selects the correct CCAssignFn for a the given CallingConvention
|
||||
/// value.
|
||||
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
|
||||
|
||||
/// computeMaskedBitsForTargetNode - Determine which of the bits specified in
|
||||
/// Mask are known to be either zero or one and return them in the
|
||||
/// KnownZero/KnownOne bitsets.
|
||||
void computeMaskedBitsForTargetNode(const SDValue Op, APInt &KnownZero,
|
||||
APInt &KnownOne, const SelectionDAG &DAG,
|
||||
unsigned Depth = 0) const;
|
||||
|
||||
virtual MVT getScalarShiftAmountTy(EVT LHSTy) const;
|
||||
|
||||
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
|
||||
/// unaligned memory accesses. of the specified type.
|
||||
virtual bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
|
||||
bool *Fast = 0) const {
|
||||
if (RequireStrictAlign)
|
||||
return false;
|
||||
// FIXME: True for Cyclone, but not necessary others.
|
||||
if (Fast)
|
||||
*Fast = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// LowerOperation - Provide custom lowering hooks for some operations.
|
||||
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
virtual const char *getTargetNodeName(unsigned Opcode) const;
|
||||
|
||||
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
|
||||
/// getFunctionAlignment - Return the Log2 alignment of this function.
|
||||
virtual unsigned getFunctionAlignment(const Function *F) const;
|
||||
|
||||
/// getMaximalGlobalOffset - Returns the maximal possible offset which can
|
||||
/// be used for loads / stores from the global.
|
||||
virtual unsigned getMaximalGlobalOffset() const;
|
||||
|
||||
/// Returns true if a cast between SrcAS and DestAS is a noop.
|
||||
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
|
||||
// Addrspacecasts are always noops.
|
||||
return true;
|
||||
}
|
||||
|
||||
/// createFastISel - This method returns a target specific FastISel object,
|
||||
/// or null if the target does not support "fast" ISel.
|
||||
virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
|
||||
const TargetLibraryInfo *libInfo) const;
|
||||
|
||||
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
|
||||
|
||||
virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
|
||||
|
||||
/// isShuffleMaskLegal - Return true if the given shuffle mask can be
|
||||
/// codegen'd directly, or if it should be stack expanded.
|
||||
virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
|
||||
|
||||
/// getSetCCResultType - Return the ISD::SETCC ValueType
|
||||
virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const;
|
||||
|
||||
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
|
||||
unsigned Size, unsigned BinOpcode) const;
|
||||
MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB,
|
||||
unsigned Size) const;
|
||||
MachineBasicBlock *EmitAtomicBinary128(MachineInstr *MI,
|
||||
MachineBasicBlock *BB,
|
||||
unsigned BinOpcodeLo,
|
||||
unsigned BinOpcodeHi) const;
|
||||
MachineBasicBlock *EmitAtomicCmpSwap128(MachineInstr *MI,
|
||||
MachineBasicBlock *BB) const;
|
||||
MachineBasicBlock *EmitAtomicMinMax128(MachineInstr *MI,
|
||||
MachineBasicBlock *BB,
|
||||
unsigned CondCode) const;
|
||||
MachineBasicBlock *EmitF128CSEL(MachineInstr *MI,
|
||||
MachineBasicBlock *BB) const;
|
||||
|
||||
virtual MachineBasicBlock *
|
||||
EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
|
||||
|
||||
virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
|
||||
unsigned Intrinsic) const;
|
||||
|
||||
virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
|
||||
virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
|
||||
|
||||
virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
|
||||
virtual bool isZExtFree(EVT VT1, EVT VT2) const;
|
||||
virtual bool isZExtFree(SDValue Val, EVT VT2) const;
|
||||
|
||||
virtual bool hasPairedLoad(Type *LoadedType,
|
||||
unsigned &RequiredAligment) const;
|
||||
virtual bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const;
|
||||
|
||||
virtual bool isLegalAddImmediate(int64_t) const;
|
||||
virtual bool isLegalICmpImmediate(int64_t) const;
|
||||
|
||||
virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
|
||||
unsigned SrcAlign, bool IsMemset,
|
||||
bool ZeroMemset, bool MemcpyStrSrc,
|
||||
MachineFunction &MF) const;
|
||||
|
||||
/// isLegalAddressingMode - Return true if the addressing mode represented
|
||||
/// by AM is legal for this target, for a load/store of the specified type.
|
||||
virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
|
||||
|
||||
/// \brief Return the cost of the scaling factor used in the addressing
|
||||
/// mode represented by AM for this target, for a load/store
|
||||
/// of the specified type.
|
||||
/// If the AM is supported, the return value must be >= 0.
|
||||
/// If the AM is not supported, it returns a negative value.
|
||||
virtual int getScalingFactorCost(const AddrMode &AM, Type *Ty) const;
|
||||
|
||||
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
|
||||
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
|
||||
/// expanded to FMAs when this method returns true, otherwise fmuladd is
|
||||
/// expanded to fmul + fadd.
|
||||
virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
|
||||
|
||||
virtual const uint16_t *getScratchRegisters(CallingConv::ID CC) const;
|
||||
|
||||
virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
|
||||
Type *Ty) const;
|
||||
|
||||
private:
|
||||
/// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can
|
||||
/// make the right decision when generating code for different targets.
|
||||
const ARM64Subtarget *Subtarget;
|
||||
|
||||
void addTypeForNEON(EVT VT, EVT PromotedBitwiseVT);
|
||||
void addDRTypeForNEON(MVT VT);
|
||||
void addQRTypeForNEON(MVT VT);
|
||||
|
||||
virtual SDValue
|
||||
LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL,
|
||||
SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
|
||||
virtual SDValue LowerCall(CallLoweringInfo & /*CLI*/,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
|
||||
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
|
||||
CallingConv::ID CallConv, bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL,
|
||||
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
|
||||
bool isThisReturn, SDValue ThisVal) const;
|
||||
|
||||
bool isEligibleForTailCallOptimization(
|
||||
SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
|
||||
bool isCalleeStructRet, bool isCallerStructRet,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
|
||||
|
||||
void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL,
|
||||
SDValue &Chain) const;
|
||||
|
||||
virtual bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
LLVMContext &Context) const;
|
||||
|
||||
virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
const SmallVectorImpl<SDValue> &OutVals, SDLoc DL,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL,
|
||||
SelectionDAG &DAG) const;
|
||||
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
|
||||
RTLIB::Libcall Call) const;
|
||||
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVectorAND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
ConstraintType getConstraintType(const std::string &Constraint) const;
|
||||
|
||||
/// Examine constraint string and operand type and determine a weight value.
|
||||
/// The operand object must already have been set up with the operand type.
|
||||
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info,
|
||||
const char *constraint) const;
|
||||
|
||||
std::pair<unsigned, const TargetRegisterClass *>
|
||||
getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const;
|
||||
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
|
||||
std::vector<SDValue> &Ops,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const;
|
||||
bool mayBeEmittedAsTailCall(CallInst *CI) const;
|
||||
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
|
||||
ISD::MemIndexedMode &AM, bool &IsInc,
|
||||
SelectionDAG &DAG) const;
|
||||
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
|
||||
ISD::MemIndexedMode &AM,
|
||||
SelectionDAG &DAG) const;
|
||||
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
|
||||
SDValue &Offset, ISD::MemIndexedMode &AM,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
|
||||
SelectionDAG &DAG) const;
|
||||
};
|
||||
|
||||
namespace ARM64 {
|
||||
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
|
||||
const TargetLibraryInfo *libInfo);
|
||||
} // end namespace ARM64
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_TARGET_ARM64_ISELLOWERING_H
|
293
lib/Target/ARM64/ARM64InstrAtomics.td
Normal file
293
lib/Target/ARM64/ARM64InstrAtomics.td
Normal file
@ -0,0 +1,293 @@
|
||||
//===- ARM64InstrAtomics.td - ARM64 Atomic codegen support -*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// ARM64 Atomic operand code-gen constructs.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------
|
||||
// Atomic fences
|
||||
//===----------------------------------
|
||||
def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>;
|
||||
def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>;
|
||||
|
||||
//===----------------------------------
|
||||
// Atomic loads
|
||||
//===----------------------------------
|
||||
|
||||
// When they're actually atomic, only one addressing mode (GPR64sp) is
|
||||
// supported, but when they're relaxed and anything can be used, all the
|
||||
// standard modes would be valid and may give efficiency gains.
|
||||
|
||||
// A atomic load operation that actually needs acquire semantics.
|
||||
class acquiring_load<PatFrag base>
|
||||
: PatFrag<(ops node:$ptr), (base node:$ptr), [{
|
||||
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
|
||||
assert(Ordering != AcquireRelease && "unexpected load ordering");
|
||||
return Ordering == Acquire || Ordering == SequentiallyConsistent;
|
||||
}]>;
|
||||
|
||||
// An atomic load operation that does not need either acquire or release
|
||||
// semantics.
|
||||
class relaxed_load<PatFrag base>
|
||||
: PatFrag<(ops node:$ptr), (base node:$ptr), [{
|
||||
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
|
||||
return Ordering == Monotonic || Ordering == Unordered;
|
||||
}]>;
|
||||
|
||||
// 8-bit loads
|
||||
def : Pat<(acquiring_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
|
||||
def : Pat<(relaxed_load<atomic_load_8> ro_indexed8:$addr),
|
||||
(LDRBBro ro_indexed8:$addr)>;
|
||||
def : Pat<(relaxed_load<atomic_load_8> am_indexed8:$addr),
|
||||
(LDRBBui am_indexed8:$addr)>;
|
||||
def : Pat<(relaxed_load<atomic_load_8> am_unscaled8:$addr),
|
||||
(LDURBBi am_unscaled8:$addr)>;
|
||||
|
||||
// 16-bit loads
|
||||
def : Pat<(acquiring_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
|
||||
def : Pat<(relaxed_load<atomic_load_16> ro_indexed16:$addr),
|
||||
(LDRHHro ro_indexed16:$addr)>;
|
||||
def : Pat<(relaxed_load<atomic_load_16> am_indexed16:$addr),
|
||||
(LDRHHui am_indexed16:$addr)>;
|
||||
def : Pat<(relaxed_load<atomic_load_16> am_unscaled16:$addr),
|
||||
(LDURHHi am_unscaled16:$addr)>;
|
||||
|
||||
// 32-bit loads
|
||||
def : Pat<(acquiring_load<atomic_load_32> GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>;
|
||||
def : Pat<(relaxed_load<atomic_load_32> ro_indexed32:$addr),
|
||||
(LDRWro ro_indexed32:$addr)>;
|
||||
def : Pat<(relaxed_load<atomic_load_32> am_indexed32:$addr),
|
||||
(LDRWui am_indexed32:$addr)>;
|
||||
def : Pat<(relaxed_load<atomic_load_32> am_unscaled32:$addr),
|
||||
(LDURWi am_unscaled32:$addr)>;
|
||||
|
||||
// 64-bit loads
|
||||
def : Pat<(acquiring_load<atomic_load_64> GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>;
|
||||
def : Pat<(relaxed_load<atomic_load_64> ro_indexed64:$addr),
|
||||
(LDRXro ro_indexed64:$addr)>;
|
||||
def : Pat<(relaxed_load<atomic_load_64> am_indexed64:$addr),
|
||||
(LDRXui am_indexed64:$addr)>;
|
||||
def : Pat<(relaxed_load<atomic_load_64> am_unscaled64:$addr),
|
||||
(LDURXi am_unscaled64:$addr)>;
|
||||
|
||||
//===----------------------------------
|
||||
// Atomic stores
|
||||
//===----------------------------------
|
||||
|
||||
// When they're actually atomic, only one addressing mode (GPR64sp) is
|
||||
// supported, but when they're relaxed and anything can be used, all the
|
||||
// standard modes would be valid and may give efficiency gains.
|
||||
|
||||
// A store operation that actually needs release semantics.
|
||||
class releasing_store<PatFrag base>
|
||||
: PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
|
||||
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
|
||||
assert(Ordering != AcquireRelease && "unexpected store ordering");
|
||||
return Ordering == Release || Ordering == SequentiallyConsistent;
|
||||
}]>;
|
||||
|
||||
// An atomic store operation that doesn't actually need to be atomic on ARM64.
|
||||
class relaxed_store<PatFrag base>
|
||||
: PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
|
||||
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
|
||||
return Ordering == Monotonic || Ordering == Unordered;
|
||||
}]>;
|
||||
|
||||
// 8-bit stores
|
||||
def : Pat<(releasing_store<atomic_store_8> GPR64sp:$ptr, GPR32:$val),
|
||||
(STLRB GPR32:$val, GPR64sp:$ptr)>;
|
||||
def : Pat<(relaxed_store<atomic_store_8> ro_indexed8:$ptr, GPR32:$val),
|
||||
(STRBBro GPR32:$val, ro_indexed8:$ptr)>;
|
||||
def : Pat<(relaxed_store<atomic_store_8> am_indexed8:$ptr, GPR32:$val),
|
||||
(STRBBui GPR32:$val, am_indexed8:$ptr)>;
|
||||
def : Pat<(relaxed_store<atomic_store_8> am_unscaled8:$ptr, GPR32:$val),
|
||||
(STURBBi GPR32:$val, am_unscaled8:$ptr)>;
|
||||
|
||||
// 16-bit stores
|
||||
def : Pat<(releasing_store<atomic_store_16> GPR64sp:$ptr, GPR32:$val),
|
||||
(STLRH GPR32:$val, GPR64sp:$ptr)>;
|
||||
def : Pat<(relaxed_store<atomic_store_16> ro_indexed16:$ptr, GPR32:$val),
|
||||
(STRHHro GPR32:$val, ro_indexed16:$ptr)>;
|
||||
def : Pat<(relaxed_store<atomic_store_16> am_indexed16:$ptr, GPR32:$val),
|
||||
(STRHHui GPR32:$val, am_indexed16:$ptr)>;
|
||||
def : Pat<(relaxed_store<atomic_store_16> am_unscaled16:$ptr, GPR32:$val),
|
||||
(STURHHi GPR32:$val, am_unscaled16:$ptr)>;
|
||||
|
||||
// 32-bit stores
|
||||
def : Pat<(releasing_store<atomic_store_32> GPR64sp:$ptr, GPR32:$val),
|
||||
(STLRW GPR32:$val, GPR64sp:$ptr)>;
|
||||
def : Pat<(relaxed_store<atomic_store_32> ro_indexed32:$ptr, GPR32:$val),
|
||||
(STRWro GPR32:$val, ro_indexed32:$ptr)>;
|
||||
def : Pat<(relaxed_store<atomic_store_32> am_indexed32:$ptr, GPR32:$val),
|
||||
(STRWui GPR32:$val, am_indexed32:$ptr)>;
|
||||
def : Pat<(relaxed_store<atomic_store_32> am_unscaled32:$ptr, GPR32:$val),
|
||||
(STURWi GPR32:$val, am_unscaled32:$ptr)>;
|
||||
|
||||
// 64-bit stores
|
||||
def : Pat<(releasing_store<atomic_store_64> GPR64sp:$ptr, GPR64:$val),
|
||||
(STLRX GPR64:$val, GPR64sp:$ptr)>;
|
||||
def : Pat<(relaxed_store<atomic_store_64> ro_indexed64:$ptr, GPR64:$val),
|
||||
(STRXro GPR64:$val, ro_indexed64:$ptr)>;
|
||||
def : Pat<(relaxed_store<atomic_store_64> am_indexed64:$ptr, GPR64:$val),
|
||||
(STRXui GPR64:$val, am_indexed64:$ptr)>;
|
||||
def : Pat<(relaxed_store<atomic_store_64> am_unscaled64:$ptr, GPR64:$val),
|
||||
(STURXi GPR64:$val, am_unscaled64:$ptr)>;
|
||||
|
||||
//===----------------------------------
|
||||
// Atomic read-modify-write operations
|
||||
//===----------------------------------
|
||||
|
||||
// More complicated operations need lots of C++ support, so we just create
|
||||
// skeletons here for the C++ code to refer to.
|
||||
|
||||
let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in {
|
||||
multiclass AtomicSizes {
|
||||
def _I8 : Pseudo<(outs GPR32:$dst),
|
||||
(ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
|
||||
def _I16 : Pseudo<(outs GPR32:$dst),
|
||||
(ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
|
||||
def _I32 : Pseudo<(outs GPR32:$dst),
|
||||
(ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
|
||||
def _I64 : Pseudo<(outs GPR64:$dst),
|
||||
(ins GPR64sp:$ptr, GPR64:$incr, i32imm:$ordering), []>;
|
||||
def _I128 : Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi),
|
||||
(ins GPR64sp:$ptr, GPR64:$incrlo, GPR64:$incrhi,
|
||||
i32imm:$ordering), []>;
|
||||
}
|
||||
}
|
||||
|
||||
defm ATOMIC_LOAD_ADD : AtomicSizes;
|
||||
defm ATOMIC_LOAD_SUB : AtomicSizes;
|
||||
defm ATOMIC_LOAD_AND : AtomicSizes;
|
||||
defm ATOMIC_LOAD_OR : AtomicSizes;
|
||||
defm ATOMIC_LOAD_XOR : AtomicSizes;
|
||||
defm ATOMIC_LOAD_NAND : AtomicSizes;
|
||||
defm ATOMIC_SWAP : AtomicSizes;
|
||||
let Defs = [CPSR] in {
|
||||
// These operations need a CMP to calculate the correct value
|
||||
defm ATOMIC_LOAD_MIN : AtomicSizes;
|
||||
defm ATOMIC_LOAD_MAX : AtomicSizes;
|
||||
defm ATOMIC_LOAD_UMIN : AtomicSizes;
|
||||
defm ATOMIC_LOAD_UMAX : AtomicSizes;
|
||||
}
|
||||
|
||||
class AtomicCmpSwap<RegisterClass GPRData>
|
||||
: Pseudo<(outs GPRData:$dst),
|
||||
(ins GPR64sp:$ptr, GPRData:$old, GPRData:$new,
|
||||
i32imm:$ordering), []> {
|
||||
let usesCustomInserter = 1;
|
||||
let hasCtrlDep = 1;
|
||||
let mayLoad = 1;
|
||||
let mayStore = 1;
|
||||
let Defs = [CPSR];
|
||||
}
|
||||
|
||||
def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<GPR32>;
|
||||
def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>;
|
||||
def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>;
|
||||
def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>;
|
||||
|
||||
def ATOMIC_CMP_SWAP_I128
|
||||
: Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi),
|
||||
(ins GPR64sp:$ptr, GPR64:$oldlo, GPR64:$oldhi,
|
||||
GPR64:$newlo, GPR64:$newhi, i32imm:$ordering), []> {
|
||||
let usesCustomInserter = 1;
|
||||
let hasCtrlDep = 1;
|
||||
let mayLoad = 1;
|
||||
let mayStore = 1;
|
||||
let Defs = [CPSR];
|
||||
}
|
||||
|
||||
//===----------------------------------
|
||||
// Low-level exclusive operations
|
||||
//===----------------------------------
|
||||
|
||||
// Load-exclusives.
|
||||
|
||||
def ldxr_1 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
|
||||
}]>;
|
||||
|
||||
def ldxr_2 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
|
||||
}]>;
|
||||
|
||||
def ldxr_4 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
|
||||
}]>;
|
||||
|
||||
def ldxr_8 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
|
||||
}]>;
|
||||
|
||||
def : Pat<(ldxr_1 am_noindex:$addr),
|
||||
(SUBREG_TO_REG (i64 0), (LDXRB am_noindex:$addr), sub_32)>;
|
||||
def : Pat<(ldxr_2 am_noindex:$addr),
|
||||
(SUBREG_TO_REG (i64 0), (LDXRH am_noindex:$addr), sub_32)>;
|
||||
def : Pat<(ldxr_4 am_noindex:$addr),
|
||||
(SUBREG_TO_REG (i64 0), (LDXRW am_noindex:$addr), sub_32)>;
|
||||
def : Pat<(ldxr_8 am_noindex:$addr), (LDXRX am_noindex:$addr)>;
|
||||
|
||||
def : Pat<(and (ldxr_1 am_noindex:$addr), 0xff),
|
||||
(SUBREG_TO_REG (i64 0), (LDXRB am_noindex:$addr), sub_32)>;
|
||||
def : Pat<(and (ldxr_2 am_noindex:$addr), 0xffff),
|
||||
(SUBREG_TO_REG (i64 0), (LDXRH am_noindex:$addr), sub_32)>;
|
||||
def : Pat<(and (ldxr_4 am_noindex:$addr), 0xffffffff),
|
||||
(SUBREG_TO_REG (i64 0), (LDXRW am_noindex:$addr), sub_32)>;
|
||||
|
||||
// Store-exclusives.
|
||||
|
||||
def stxr_1 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(int_arm64_stxr node:$val, node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
|
||||
}]>;
|
||||
|
||||
def stxr_2 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(int_arm64_stxr node:$val, node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
|
||||
}]>;
|
||||
|
||||
def stxr_4 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(int_arm64_stxr node:$val, node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
|
||||
}]>;
|
||||
|
||||
def stxr_8 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(int_arm64_stxr node:$val, node:$ptr), [{
|
||||
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
|
||||
}]>;
|
||||
|
||||
def : Pat<(stxr_1 GPR64:$val, am_noindex:$addr),
|
||||
(STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
|
||||
def : Pat<(stxr_2 GPR64:$val, am_noindex:$addr),
|
||||
(STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
|
||||
def : Pat<(stxr_4 GPR64:$val, am_noindex:$addr),
|
||||
(STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
|
||||
def : Pat<(stxr_8 GPR64:$val, am_noindex:$addr),
|
||||
(STXRX GPR64:$val, am_noindex:$addr)>;
|
||||
|
||||
def : Pat<(stxr_1 (zext (and GPR32:$val, 0xff)), am_noindex:$addr),
|
||||
(STXRB GPR32:$val, am_noindex:$addr)>;
|
||||
def : Pat<(stxr_2 (zext (and GPR32:$val, 0xffff)), am_noindex:$addr),
|
||||
(STXRH GPR32:$val, am_noindex:$addr)>;
|
||||
def : Pat<(stxr_4 (zext GPR32:$val), am_noindex:$addr),
|
||||
(STXRW GPR32:$val, am_noindex:$addr)>;
|
||||
|
||||
def : Pat<(stxr_1 (and GPR64:$val, 0xff), am_noindex:$addr),
|
||||
(STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
|
||||
def : Pat<(stxr_2 (and GPR64:$val, 0xffff), am_noindex:$addr),
|
||||
(STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
|
||||
def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), am_noindex:$addr),
|
||||
(STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
|
||||
|
||||
|
||||
// And clear exclusive.
|
||||
|
||||
def : Pat<(int_arm64_clrex), (CLREX 0xf)>;
|
8199
lib/Target/ARM64/ARM64InstrFormats.td
Normal file
8199
lib/Target/ARM64/ARM64InstrFormats.td
Normal file
File diff suppressed because it is too large
Load Diff
1864
lib/Target/ARM64/ARM64InstrInfo.cpp
Normal file
1864
lib/Target/ARM64/ARM64InstrInfo.cpp
Normal file
File diff suppressed because it is too large
Load Diff
223
lib/Target/ARM64/ARM64InstrInfo.h
Normal file
223
lib/Target/ARM64/ARM64InstrInfo.h
Normal file
@ -0,0 +1,223 @@
|
||||
//===- ARM64InstrInfo.h - ARM64 Instruction Information ---------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the ARM64 implementation of the TargetInstrInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TARGET_ARM64INSTRINFO_H
|
||||
#define LLVM_TARGET_ARM64INSTRINFO_H
|
||||
|
||||
#include "ARM64.h"
|
||||
#include "ARM64RegisterInfo.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
|
||||
#define GET_INSTRINFO_HEADER
|
||||
#include "ARM64GenInstrInfo.inc"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class ARM64Subtarget;
|
||||
class ARM64TargetMachine;
|
||||
|
||||
class ARM64InstrInfo : public ARM64GenInstrInfo {
|
||||
// Reserve bits in the MachineMemOperand target hint flags, starting at 1.
|
||||
// They will be shifted into MOTargetHintStart when accessed.
|
||||
enum TargetMemOperandFlags {
|
||||
MOSuppressPair = 1
|
||||
};
|
||||
|
||||
const ARM64RegisterInfo RI;
|
||||
const ARM64Subtarget &Subtarget;
|
||||
|
||||
public:
|
||||
explicit ARM64InstrInfo(const ARM64Subtarget &STI);
|
||||
|
||||
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
|
||||
/// such, whenever a client has an instance of instruction info, it should
|
||||
/// always be able to get register info as well (through this method).
|
||||
virtual const ARM64RegisterInfo &getRegisterInfo() const { return RI; }
|
||||
|
||||
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
|
||||
|
||||
virtual bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
|
||||
unsigned &DstReg, unsigned &SubIdx) const;
|
||||
|
||||
virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
|
||||
int &FrameIndex) const;
|
||||
virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
|
||||
int &FrameIndex) const;
|
||||
|
||||
/// \brief Does this instruction set its full destination register to zero?
|
||||
bool isGPRZero(const MachineInstr *MI) const;
|
||||
|
||||
/// \brief Does this instruction rename a GPR without modifying bits?
|
||||
bool isGPRCopy(const MachineInstr *MI) const;
|
||||
|
||||
/// \brief Does this instruction rename an FPR without modifying bits?
|
||||
bool isFPRCopy(const MachineInstr *MI) const;
|
||||
|
||||
/// Return true if this is load/store scales or extends its register offset.
|
||||
/// This refers to scaling a dynamic index as opposed to scaled immediates.
|
||||
/// MI should be a memory op that allows scaled addressing.
|
||||
bool isScaledAddr(const MachineInstr *MI) const;
|
||||
|
||||
/// Return true if pairing the given load or store is hinted to be
|
||||
/// unprofitable.
|
||||
bool isLdStPairSuppressed(const MachineInstr *MI) const;
|
||||
|
||||
/// Hint that pairing the given load or store is unprofitable.
|
||||
void suppressLdStPair(MachineInstr *MI) const;
|
||||
|
||||
virtual bool getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
|
||||
unsigned &Offset,
|
||||
const TargetRegisterInfo *TRI) const;
|
||||
|
||||
virtual bool enableClusterLoads() const { return true; }
|
||||
|
||||
virtual bool shouldClusterLoads(MachineInstr *FirstLdSt,
|
||||
MachineInstr *SecondLdSt,
|
||||
unsigned NumLoads) const;
|
||||
|
||||
virtual bool shouldScheduleAdjacent(MachineInstr *First,
|
||||
MachineInstr *Second) const;
|
||||
|
||||
MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
|
||||
uint64_t Offset, const MDNode *MDPtr,
|
||||
DebugLoc DL) const;
|
||||
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
||||
DebugLoc DL, unsigned DestReg, unsigned SrcReg,
|
||||
bool KillSrc, unsigned Opcode,
|
||||
llvm::ArrayRef<unsigned> Indices) const;
|
||||
virtual void copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
bool KillSrc) const;
|
||||
|
||||
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
unsigned SrcReg, bool isKill, int FrameIndex,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const;
|
||||
|
||||
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
unsigned DestReg, int FrameIndex,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const;
|
||||
|
||||
virtual MachineInstr *
|
||||
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
|
||||
const SmallVectorImpl<unsigned> &Ops,
|
||||
int FrameIndex) const;
|
||||
|
||||
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
|
||||
MachineBasicBlock *&FBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond,
|
||||
bool AllowModify = false) const;
|
||||
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
|
||||
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
|
||||
MachineBasicBlock *FBB,
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
DebugLoc DL) const;
|
||||
virtual bool
|
||||
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
|
||||
virtual bool canInsertSelect(const MachineBasicBlock &,
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
unsigned, unsigned, int &, int &, int &) const;
|
||||
virtual void insertSelect(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI, DebugLoc DL,
|
||||
unsigned DstReg,
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
unsigned TrueReg, unsigned FalseReg) const;
|
||||
virtual void getNoopForMachoTarget(MCInst &NopInst) const;
|
||||
|
||||
/// analyzeCompare - For a comparison instruction, return the source registers
|
||||
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
|
||||
/// Return true if the comparison instruction can be analyzed.
|
||||
virtual bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
|
||||
unsigned &SrcReg2, int &CmpMask,
|
||||
int &CmpValue) const;
|
||||
/// optimizeCompareInstr - Convert the instruction supplying the argument to
|
||||
/// the comparison into one that sets the zero bit in the flags register.
|
||||
virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
|
||||
unsigned SrcReg2, int CmpMask, int CmpValue,
|
||||
const MachineRegisterInfo *MRI) const;
|
||||
|
||||
private:
|
||||
void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL,
|
||||
MachineBasicBlock *TBB,
|
||||
const SmallVectorImpl<MachineOperand> &Cond) const;
|
||||
};
|
||||
|
||||
/// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg
|
||||
/// plus Offset. This is intended to be used from within the prolog/epilog
|
||||
/// insertion (PEI) pass, where a virtual scratch register may be allocated
|
||||
/// if necessary, to be replaced by the scavenger at the end of PEI.
|
||||
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset,
|
||||
const ARM64InstrInfo *TII,
|
||||
MachineInstr::MIFlag = MachineInstr::NoFlags,
|
||||
bool SetCPSR = false);
|
||||
|
||||
/// rewriteARM64FrameIndex - Rewrite MI to access 'Offset' bytes from the
|
||||
/// FP. Return false if the offset could not be handled directly in MI, and
|
||||
/// return the left-over portion by reference.
|
||||
bool rewriteARM64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
unsigned FrameReg, int &Offset,
|
||||
const ARM64InstrInfo *TII);
|
||||
|
||||
/// \brief Use to report the frame offset status in isARM64FrameOffsetLegal.
|
||||
enum ARM64FrameOffsetStatus {
|
||||
ARM64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply.
|
||||
ARM64FrameOffsetIsLegal = 0x1, ///< Offset is legal.
|
||||
ARM64FrameOffsetCanUpdate = 0x2 ///< Offset can apply, at least partly.
|
||||
};
|
||||
|
||||
/// \brief Check if the @p Offset is a valid frame offset for @p MI.
|
||||
/// The returned value reports the validity of the frame offset for @p MI.
|
||||
/// It uses the values defined by ARM64FrameOffsetStatus for that.
|
||||
/// If result == ARM64FrameOffsetCannotUpdate, @p MI cannot be updated to
|
||||
/// use an offset.eq
|
||||
/// If result & ARM64FrameOffsetIsLegal, @p Offset can completely be
|
||||
/// rewriten in @p MI.
|
||||
/// If result & ARM64FrameOffsetCanUpdate, @p Offset contains the
|
||||
/// amount that is off the limit of the legal offset.
|
||||
/// If set, @p OutUseUnscaledOp will contain the whether @p MI should be
|
||||
/// turned into an unscaled operator, which opcode is in @p OutUnscaledOp.
|
||||
/// If set, @p EmittableOffset contains the amount that can be set in @p MI
|
||||
/// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that
|
||||
/// is a legal offset.
|
||||
int isARM64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
|
||||
bool *OutUseUnscaledOp = NULL,
|
||||
unsigned *OutUnscaledOp = NULL,
|
||||
int *EmittableOffset = NULL);
|
||||
|
||||
static inline bool isUncondBranchOpcode(int Opc) { return Opc == ARM64::B; }
|
||||
|
||||
static inline bool isCondBranchOpcode(int Opc) {
|
||||
switch (Opc) {
|
||||
case ARM64::Bcc:
|
||||
case ARM64::CBZW:
|
||||
case ARM64::CBZX:
|
||||
case ARM64::CBNZW:
|
||||
case ARM64::CBNZX:
|
||||
case ARM64::TBZ:
|
||||
case ARM64::TBNZ:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool isIndirectBranchOpcode(int Opc) { return Opc == ARM64::BR; }
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
4394
lib/Target/ARM64/ARM64InstrInfo.td
Normal file
4394
lib/Target/ARM64/ARM64InstrInfo.td
Normal file
File diff suppressed because it is too large
Load Diff
950
lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp
Normal file
950
lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp
Normal file
@ -0,0 +1,950 @@
|
||||
//===-- ARM64LoadStoreOptimizer.cpp - ARM64 load/store opt. pass --*- C++ -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains a pass that performs load / store related peephole
|
||||
// optimizations. This pass should be run after register allocation.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "arm64-ldst-opt"
|
||||
#include "ARM64InstrInfo.h"
|
||||
#include "MCTargetDesc/ARM64AddressingModes.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
using namespace llvm;
|
||||
|
||||
/// ARM64AllocLoadStoreOpt - Post-register allocation pass to combine
|
||||
/// load / store instructions to form ldp / stp instructions.
|
||||
|
||||
STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
|
||||
STATISTIC(NumPostFolded, "Number of post-index updates folded");
|
||||
STATISTIC(NumPreFolded, "Number of pre-index updates folded");
|
||||
STATISTIC(NumUnscaledPairCreated,
|
||||
"Number of load/store from unscaled generated");
|
||||
|
||||
static cl::opt<bool> DoLoadStoreOpt("arm64-load-store-opt", cl::init(true),
|
||||
cl::Hidden);
|
||||
static cl::opt<unsigned> ScanLimit("arm64-load-store-scan-limit", cl::init(20),
|
||||
cl::Hidden);
|
||||
|
||||
// Place holder while testing unscaled load/store combining
|
||||
static cl::opt<bool>
|
||||
EnableARM64UnscaledMemOp("arm64-unscaled-mem-op", cl::Hidden,
|
||||
cl::desc("Allow ARM64 unscaled load/store combining"),
|
||||
cl::init(true));
|
||||
|
||||
namespace {
|
||||
struct ARM64LoadStoreOpt : public MachineFunctionPass {
|
||||
static char ID;
|
||||
ARM64LoadStoreOpt() : MachineFunctionPass(ID) {}
|
||||
|
||||
const ARM64InstrInfo *TII;
|
||||
const TargetRegisterInfo *TRI;
|
||||
|
||||
// Scan the instructions looking for a load/store that can be combined
|
||||
// with the current instruction into a load/store pair.
|
||||
// Return the matching instruction if one is found, else MBB->end().
|
||||
// If a matching instruction is found, mergeForward is set to true if the
|
||||
// merge is to remove the first instruction and replace the second with
|
||||
// a pair-wise insn, and false if the reverse is true.
|
||||
MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
|
||||
bool &mergeForward,
|
||||
unsigned Limit);
|
||||
// Merge the two instructions indicated into a single pair-wise instruction.
|
||||
// If mergeForward is true, erase the first instruction and fold its
|
||||
// operation into the second. If false, the reverse. Return the instruction
|
||||
// following the first instruction (which may change during proecessing).
|
||||
MachineBasicBlock::iterator
|
||||
mergePairedInsns(MachineBasicBlock::iterator I,
|
||||
MachineBasicBlock::iterator Paired, bool mergeForward);
|
||||
|
||||
// Scan the instruction list to find a base register update that can
|
||||
// be combined with the current instruction (a load or store) using
|
||||
// pre or post indexed addressing with writeback. Scan forwards.
|
||||
MachineBasicBlock::iterator
|
||||
findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit,
|
||||
int Value);
|
||||
|
||||
// Scan the instruction list to find a base register update that can
|
||||
// be combined with the current instruction (a load or store) using
|
||||
// pre or post indexed addressing with writeback. Scan backwards.
|
||||
MachineBasicBlock::iterator
|
||||
findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
|
||||
|
||||
// Merge a pre-index base register update into a ld/st instruction.
|
||||
MachineBasicBlock::iterator
|
||||
mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
|
||||
MachineBasicBlock::iterator Update);
|
||||
|
||||
// Merge a post-index base register update into a ld/st instruction.
|
||||
MachineBasicBlock::iterator
|
||||
mergePostIdxUpdateInsn(MachineBasicBlock::iterator I,
|
||||
MachineBasicBlock::iterator Update);
|
||||
|
||||
bool optimizeBlock(MachineBasicBlock &MBB);
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &Fn);
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "ARM64 load / store optimization pass";
|
||||
}
|
||||
|
||||
private:
|
||||
int getMemSize(MachineInstr *MemMI);
|
||||
};
|
||||
char ARM64LoadStoreOpt::ID = 0;
|
||||
}
|
||||
|
||||
static bool isUnscaledLdst(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
default:
|
||||
return false;
|
||||
case ARM64::STURSi:
|
||||
return true;
|
||||
case ARM64::STURDi:
|
||||
return true;
|
||||
case ARM64::STURQi:
|
||||
return true;
|
||||
case ARM64::STURWi:
|
||||
return true;
|
||||
case ARM64::STURXi:
|
||||
return true;
|
||||
case ARM64::LDURSi:
|
||||
return true;
|
||||
case ARM64::LDURDi:
|
||||
return true;
|
||||
case ARM64::LDURQi:
|
||||
return true;
|
||||
case ARM64::LDURWi:
|
||||
return true;
|
||||
case ARM64::LDURXi:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Size in bytes of the data moved by an unscaled load or store
|
||||
int ARM64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
|
||||
switch (MemMI->getOpcode()) {
|
||||
default:
|
||||
llvm_unreachable("Opcode has has unknown size!");
|
||||
case ARM64::STRSui:
|
||||
case ARM64::STURSi:
|
||||
return 4;
|
||||
case ARM64::STRDui:
|
||||
case ARM64::STURDi:
|
||||
return 8;
|
||||
case ARM64::STRQui:
|
||||
case ARM64::STURQi:
|
||||
return 16;
|
||||
case ARM64::STRWui:
|
||||
case ARM64::STURWi:
|
||||
return 4;
|
||||
case ARM64::STRXui:
|
||||
case ARM64::STURXi:
|
||||
return 8;
|
||||
case ARM64::LDRSui:
|
||||
case ARM64::LDURSi:
|
||||
return 4;
|
||||
case ARM64::LDRDui:
|
||||
case ARM64::LDURDi:
|
||||
return 8;
|
||||
case ARM64::LDRQui:
|
||||
case ARM64::LDURQi:
|
||||
return 16;
|
||||
case ARM64::LDRWui:
|
||||
case ARM64::LDURWi:
|
||||
return 4;
|
||||
case ARM64::LDRXui:
|
||||
case ARM64::LDURXi:
|
||||
return 8;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned getMatchingPairOpcode(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
default:
|
||||
llvm_unreachable("Opcode has no pairwise equivalent!");
|
||||
case ARM64::STRSui:
|
||||
case ARM64::STURSi:
|
||||
return ARM64::STPSi;
|
||||
case ARM64::STRDui:
|
||||
case ARM64::STURDi:
|
||||
return ARM64::STPDi;
|
||||
case ARM64::STRQui:
|
||||
case ARM64::STURQi:
|
||||
return ARM64::STPQi;
|
||||
case ARM64::STRWui:
|
||||
case ARM64::STURWi:
|
||||
return ARM64::STPWi;
|
||||
case ARM64::STRXui:
|
||||
case ARM64::STURXi:
|
||||
return ARM64::STPXi;
|
||||
case ARM64::LDRSui:
|
||||
case ARM64::LDURSi:
|
||||
return ARM64::LDPSi;
|
||||
case ARM64::LDRDui:
|
||||
case ARM64::LDURDi:
|
||||
return ARM64::LDPDi;
|
||||
case ARM64::LDRQui:
|
||||
case ARM64::LDURQi:
|
||||
return ARM64::LDPQi;
|
||||
case ARM64::LDRWui:
|
||||
case ARM64::LDURWi:
|
||||
return ARM64::LDPWi;
|
||||
case ARM64::LDRXui:
|
||||
case ARM64::LDURXi:
|
||||
return ARM64::LDPXi;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned getPreIndexedOpcode(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
default:
|
||||
llvm_unreachable("Opcode has no pre-indexed equivalent!");
|
||||
case ARM64::STRSui: return ARM64::STRSpre;
|
||||
case ARM64::STRDui: return ARM64::STRDpre;
|
||||
case ARM64::STRQui: return ARM64::STRQpre;
|
||||
case ARM64::STRWui: return ARM64::STRWpre;
|
||||
case ARM64::STRXui: return ARM64::STRXpre;
|
||||
case ARM64::LDRSui: return ARM64::LDRSpre;
|
||||
case ARM64::LDRDui: return ARM64::LDRDpre;
|
||||
case ARM64::LDRQui: return ARM64::LDRQpre;
|
||||
case ARM64::LDRWui: return ARM64::LDRWpre;
|
||||
case ARM64::LDRXui: return ARM64::LDRXpre;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned getPostIndexedOpcode(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
default:
|
||||
llvm_unreachable("Opcode has no post-indexed wise equivalent!");
|
||||
case ARM64::STRSui:
|
||||
return ARM64::STRSpost;
|
||||
case ARM64::STRDui:
|
||||
return ARM64::STRDpost;
|
||||
case ARM64::STRQui:
|
||||
return ARM64::STRQpost;
|
||||
case ARM64::STRWui:
|
||||
return ARM64::STRWpost;
|
||||
case ARM64::STRXui:
|
||||
return ARM64::STRXpost;
|
||||
case ARM64::LDRSui:
|
||||
return ARM64::LDRSpost;
|
||||
case ARM64::LDRDui:
|
||||
return ARM64::LDRDpost;
|
||||
case ARM64::LDRQui:
|
||||
return ARM64::LDRQpost;
|
||||
case ARM64::LDRWui:
|
||||
return ARM64::LDRWpost;
|
||||
case ARM64::LDRXui:
|
||||
return ARM64::LDRXpost;
|
||||
}
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator
|
||||
ARM64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
|
||||
MachineBasicBlock::iterator Paired,
|
||||
bool mergeForward) {
|
||||
MachineBasicBlock::iterator NextI = I;
|
||||
++NextI;
|
||||
// If NextI is the second of the two instructions to be merged, we need
|
||||
// to skip one further. Either way we merge will invalidate the iterator,
|
||||
// and we don't need to scan the new instruction, as it's a pairwise
|
||||
// instruction, which we're not considering for further action anyway.
|
||||
if (NextI == Paired)
|
||||
++NextI;
|
||||
|
||||
bool IsUnscaled = isUnscaledLdst(I->getOpcode());
|
||||
int OffsetStride = IsUnscaled && EnableARM64UnscaledMemOp ? getMemSize(I) : 1;
|
||||
|
||||
unsigned NewOpc = getMatchingPairOpcode(I->getOpcode());
|
||||
// Insert our new paired instruction after whichever of the paired
|
||||
// instructions mergeForward indicates.
|
||||
MachineBasicBlock::iterator InsertionPoint = mergeForward ? Paired : I;
|
||||
// Also based on mergeForward is from where we copy the base register operand
|
||||
// so we get the flags compatible with the input code.
|
||||
MachineOperand &BaseRegOp =
|
||||
mergeForward ? Paired->getOperand(1) : I->getOperand(1);
|
||||
|
||||
// Which register is Rt and which is Rt2 depends on the offset order.
|
||||
MachineInstr *RtMI, *Rt2MI;
|
||||
if (I->getOperand(2).getImm() ==
|
||||
Paired->getOperand(2).getImm() + OffsetStride) {
|
||||
RtMI = Paired;
|
||||
Rt2MI = I;
|
||||
} else {
|
||||
RtMI = I;
|
||||
Rt2MI = Paired;
|
||||
}
|
||||
// Handle Unscaled
|
||||
int OffsetImm = RtMI->getOperand(2).getImm();
|
||||
if (IsUnscaled && EnableARM64UnscaledMemOp)
|
||||
OffsetImm /= OffsetStride;
|
||||
|
||||
// Construct the new instruction.
|
||||
MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint,
|
||||
I->getDebugLoc(), TII->get(NewOpc))
|
||||
.addOperand(RtMI->getOperand(0))
|
||||
.addOperand(Rt2MI->getOperand(0))
|
||||
.addOperand(BaseRegOp)
|
||||
.addImm(OffsetImm);
|
||||
(void)MIB;
|
||||
|
||||
// FIXME: Do we need/want to copy the mem operands from the source
|
||||
// instructions? Probably. What uses them after this?
|
||||
|
||||
DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n ");
|
||||
DEBUG(I->print(dbgs()));
|
||||
DEBUG(dbgs() << " ");
|
||||
DEBUG(Paired->print(dbgs()));
|
||||
DEBUG(dbgs() << " with instruction:\n ");
|
||||
DEBUG(((MachineInstr *)MIB)->print(dbgs()));
|
||||
DEBUG(dbgs() << "\n");
|
||||
|
||||
// Erase the old instructions.
|
||||
I->eraseFromParent();
|
||||
Paired->eraseFromParent();
|
||||
|
||||
return NextI;
|
||||
}
|
||||
|
||||
/// trackRegDefsUses - Remember what registers the specified instruction uses
|
||||
/// and modifies.
|
||||
static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs,
|
||||
BitVector &UsedRegs,
|
||||
const TargetRegisterInfo *TRI) {
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
MachineOperand &MO = MI->getOperand(i);
|
||||
if (MO.isRegMask())
|
||||
ModifiedRegs.setBitsNotInMask(MO.getRegMask());
|
||||
|
||||
if (!MO.isReg())
|
||||
continue;
|
||||
unsigned Reg = MO.getReg();
|
||||
if (MO.isDef()) {
|
||||
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
|
||||
ModifiedRegs.set(*AI);
|
||||
} else {
|
||||
assert(MO.isUse() && "Reg operand not a def and not a use?!?");
|
||||
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
|
||||
UsedRegs.set(*AI);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
|
||||
if (!IsUnscaled && (Offset > 63 || Offset < -64))
|
||||
return false;
|
||||
if (IsUnscaled) {
|
||||
// Convert the byte-offset used by unscaled into an "element" offset used
|
||||
// by the scaled pair load/store instructions.
|
||||
int elemOffset = Offset / OffsetStride;
|
||||
if (elemOffset > 63 || elemOffset < -64)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Do alignment, specialized to power of 2 and for signed ints,
|
||||
// avoiding having to do a C-style cast from uint_64t to int when
|
||||
// using RoundUpToAlignment from include/llvm/Support/MathExtras.h.
|
||||
// FIXME: Move this function to include/MathExtras.h?
|
||||
static int alignTo(int Num, int PowOf2) {
|
||||
return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
|
||||
}
|
||||
|
||||
/// findMatchingInsn - Scan the instructions looking for a load/store that can
|
||||
/// be combined with the current instruction into a load/store pair.
|
||||
MachineBasicBlock::iterator
|
||||
ARM64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
|
||||
bool &mergeForward, unsigned Limit) {
|
||||
MachineBasicBlock::iterator E = I->getParent()->end();
|
||||
MachineBasicBlock::iterator MBBI = I;
|
||||
MachineInstr *FirstMI = I;
|
||||
++MBBI;
|
||||
|
||||
int Opc = FirstMI->getOpcode();
|
||||
bool mayLoad = FirstMI->mayLoad();
|
||||
bool IsUnscaled = isUnscaledLdst(Opc);
|
||||
unsigned Reg = FirstMI->getOperand(0).getReg();
|
||||
unsigned BaseReg = FirstMI->getOperand(1).getReg();
|
||||
int Offset = FirstMI->getOperand(2).getImm();
|
||||
|
||||
// Early exit if the first instruction modifies the base register.
|
||||
// e.g., ldr x0, [x0]
|
||||
// Early exit if the offset if not possible to match. (6 bits of positive
|
||||
// range, plus allow an extra one in case we find a later insn that matches
|
||||
// with Offset-1
|
||||
if (FirstMI->modifiesRegister(BaseReg, TRI))
|
||||
return E;
|
||||
int OffsetStride =
|
||||
IsUnscaled && EnableARM64UnscaledMemOp ? getMemSize(FirstMI) : 1;
|
||||
if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
|
||||
return E;
|
||||
|
||||
// Track which registers have been modified and used between the first insn
|
||||
// (inclusive) and the second insn.
|
||||
BitVector ModifiedRegs, UsedRegs;
|
||||
ModifiedRegs.resize(TRI->getNumRegs());
|
||||
UsedRegs.resize(TRI->getNumRegs());
|
||||
for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
|
||||
MachineInstr *MI = MBBI;
|
||||
// Skip DBG_VALUE instructions. Otherwise debug info can affect the
|
||||
// optimization by changing how far we scan.
|
||||
if (MI->isDebugValue())
|
||||
continue;
|
||||
|
||||
// Now that we know this is a real instruction, count it.
|
||||
++Count;
|
||||
|
||||
if (Opc == MI->getOpcode() && MI->getOperand(2).isImm()) {
|
||||
// If we've found another instruction with the same opcode, check to see
|
||||
// if the base and offset are compatible with our starting instruction.
|
||||
// These instructions all have scaled immediate operands, so we just
|
||||
// check for +1/-1. Make sure to check the new instruction offset is
|
||||
// actually an immediate and not a symbolic reference destined for
|
||||
// a relocation.
|
||||
//
|
||||
// Pairwise instructions have a 7-bit signed offset field. Single insns
|
||||
// have a 12-bit unsigned offset field. To be a valid combine, the
|
||||
// final offset must be in range.
|
||||
unsigned MIBaseReg = MI->getOperand(1).getReg();
|
||||
int MIOffset = MI->getOperand(2).getImm();
|
||||
if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
|
||||
(Offset + OffsetStride == MIOffset))) {
|
||||
int MinOffset = Offset < MIOffset ? Offset : MIOffset;
|
||||
// If this is a volatile load/store that otherwise matched, stop looking
|
||||
// as something is going on that we don't have enough information to
|
||||
// safely transform. Similarly, stop if we see a hint to avoid pairs.
|
||||
if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
|
||||
return E;
|
||||
// If the resultant immediate offset of merging these instructions
|
||||
// is out of range for a pairwise instruction, bail and keep looking.
|
||||
bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode());
|
||||
if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
|
||||
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
|
||||
continue;
|
||||
}
|
||||
// If the alignment requirements of the paired (scaled) instruction
|
||||
// can't express the offset of the unscaled input, bail and keep
|
||||
// looking.
|
||||
if (IsUnscaled && EnableARM64UnscaledMemOp &&
|
||||
(alignTo(MinOffset, OffsetStride) != MinOffset)) {
|
||||
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
|
||||
continue;
|
||||
}
|
||||
// If the destination register of the loads is the same register, bail
|
||||
// and keep looking. A load-pair instruction with both destination
|
||||
// registers the same is UNPREDICTABLE and will result in an exception.
|
||||
if (mayLoad && Reg == MI->getOperand(0).getReg()) {
|
||||
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the Rt of the second instruction was not modified or used between
|
||||
// the two instructions, we can combine the second into the first.
|
||||
if (!ModifiedRegs[MI->getOperand(0).getReg()] &&
|
||||
!UsedRegs[MI->getOperand(0).getReg()]) {
|
||||
mergeForward = false;
|
||||
return MBBI;
|
||||
}
|
||||
|
||||
// Likewise, if the Rt of the first instruction is not modified or used
|
||||
// between the two instructions, we can combine the first into the
|
||||
// second.
|
||||
if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] &&
|
||||
!UsedRegs[FirstMI->getOperand(0).getReg()]) {
|
||||
mergeForward = true;
|
||||
return MBBI;
|
||||
}
|
||||
// Unable to combine these instructions due to interference in between.
|
||||
// Keep looking.
|
||||
}
|
||||
}
|
||||
|
||||
// If the instruction wasn't a matching load or store, but does (or can)
|
||||
// modify memory, stop searching, as we don't have alias analysis or
|
||||
// anything like that to tell us whether the access is tromping on the
|
||||
// locations we care about. The big one we want to catch is calls.
|
||||
//
|
||||
// FIXME: Theoretically, we can do better than that for SP and FP based
|
||||
// references since we can effectively know where those are touching. It's
|
||||
// unclear if it's worth the extra code, though. Most paired instructions
|
||||
// will be sequential, perhaps with a few intervening non-memory related
|
||||
// instructions.
|
||||
if (MI->mayStore() || MI->isCall())
|
||||
return E;
|
||||
// Likewise, if we're matching a store instruction, we don't want to
|
||||
// move across a load, as it may be reading the same location.
|
||||
if (FirstMI->mayStore() && MI->mayLoad())
|
||||
return E;
|
||||
|
||||
// Update modified / uses register lists.
|
||||
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
|
||||
|
||||
// Otherwise, if the base register is modified, we have no match, so
|
||||
// return early.
|
||||
if (ModifiedRegs[BaseReg])
|
||||
return E;
|
||||
}
|
||||
return E;
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator
|
||||
ARM64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
|
||||
MachineBasicBlock::iterator Update) {
|
||||
assert((Update->getOpcode() == ARM64::ADDXri ||
|
||||
Update->getOpcode() == ARM64::SUBXri) &&
|
||||
"Unexpected base register update instruction to merge!");
|
||||
MachineBasicBlock::iterator NextI = I;
|
||||
// Return the instruction following the merged instruction, which is
|
||||
// the instruction following our unmerged load. Unless that's the add/sub
|
||||
// instruction we're merging, in which case it's the one after that.
|
||||
if (++NextI == Update)
|
||||
++NextI;
|
||||
|
||||
int Value = Update->getOperand(2).getImm();
|
||||
assert(ARM64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
|
||||
"Can't merge 1 << 12 offset into pre-indexed load / store");
|
||||
if (Update->getOpcode() == ARM64::SUBXri)
|
||||
Value = -Value;
|
||||
|
||||
unsigned NewOpc = getPreIndexedOpcode(I->getOpcode());
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
|
||||
.addOperand(I->getOperand(0))
|
||||
.addOperand(I->getOperand(1))
|
||||
.addImm(Value);
|
||||
(void)MIB;
|
||||
|
||||
DEBUG(dbgs() << "Creating pre-indexed load/store.");
|
||||
DEBUG(dbgs() << " Replacing instructions:\n ");
|
||||
DEBUG(I->print(dbgs()));
|
||||
DEBUG(dbgs() << " ");
|
||||
DEBUG(Update->print(dbgs()));
|
||||
DEBUG(dbgs() << " with instruction:\n ");
|
||||
DEBUG(((MachineInstr *)MIB)->print(dbgs()));
|
||||
DEBUG(dbgs() << "\n");
|
||||
|
||||
// Erase the old instructions for the block.
|
||||
I->eraseFromParent();
|
||||
Update->eraseFromParent();
|
||||
|
||||
return NextI;
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator
|
||||
ARM64LoadStoreOpt::mergePostIdxUpdateInsn(MachineBasicBlock::iterator I,
|
||||
MachineBasicBlock::iterator Update) {
|
||||
assert((Update->getOpcode() == ARM64::ADDXri ||
|
||||
Update->getOpcode() == ARM64::SUBXri) &&
|
||||
"Unexpected base register update instruction to merge!");
|
||||
MachineBasicBlock::iterator NextI = I;
|
||||
// Return the instruction following the merged instruction, which is
|
||||
// the instruction following our unmerged load. Unless that's the add/sub
|
||||
// instruction we're merging, in which case it's the one after that.
|
||||
if (++NextI == Update)
|
||||
++NextI;
|
||||
|
||||
int Value = Update->getOperand(2).getImm();
|
||||
assert(ARM64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
|
||||
"Can't merge 1 << 12 offset into post-indexed load / store");
|
||||
if (Update->getOpcode() == ARM64::SUBXri)
|
||||
Value = -Value;
|
||||
|
||||
unsigned NewOpc = getPostIndexedOpcode(I->getOpcode());
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
|
||||
.addOperand(I->getOperand(0))
|
||||
.addOperand(I->getOperand(1))
|
||||
.addImm(Value);
|
||||
(void)MIB;
|
||||
|
||||
DEBUG(dbgs() << "Creating post-indexed load/store.");
|
||||
DEBUG(dbgs() << " Replacing instructions:\n ");
|
||||
DEBUG(I->print(dbgs()));
|
||||
DEBUG(dbgs() << " ");
|
||||
DEBUG(Update->print(dbgs()));
|
||||
DEBUG(dbgs() << " with instruction:\n ");
|
||||
DEBUG(((MachineInstr *)MIB)->print(dbgs()));
|
||||
DEBUG(dbgs() << "\n");
|
||||
|
||||
// Erase the old instructions for the block.
|
||||
I->eraseFromParent();
|
||||
Update->eraseFromParent();
|
||||
|
||||
return NextI;
|
||||
}
|
||||
|
||||
static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg,
|
||||
int Offset) {
|
||||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
break;
|
||||
case ARM64::SUBXri:
|
||||
// Negate the offset for a SUB instruction.
|
||||
Offset *= -1;
|
||||
// FALLTHROUGH
|
||||
case ARM64::ADDXri:
|
||||
// Make sure it's a vanilla immediate operand, not a relocation or
|
||||
// anything else we can't handle.
|
||||
if (!MI->getOperand(2).isImm())
|
||||
break;
|
||||
// Watch out for 1 << 12 shifted value.
|
||||
if (ARM64_AM::getShiftValue(MI->getOperand(3).getImm()))
|
||||
break;
|
||||
// If the instruction has the base register as source and dest and the
|
||||
// immediate will fit in a signed 9-bit integer, then we have a match.
|
||||
if (MI->getOperand(0).getReg() == BaseReg &&
|
||||
MI->getOperand(1).getReg() == BaseReg &&
|
||||
MI->getOperand(2).getImm() <= 255 &&
|
||||
MI->getOperand(2).getImm() >= -256) {
|
||||
// If we have a non-zero Offset, we check that it matches the amount
|
||||
// we're adding to the register.
|
||||
if (!Offset || Offset == MI->getOperand(2).getImm())
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator
|
||||
ARM64LoadStoreOpt::findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
|
||||
unsigned Limit, int Value) {
|
||||
MachineBasicBlock::iterator E = I->getParent()->end();
|
||||
MachineInstr *MemMI = I;
|
||||
MachineBasicBlock::iterator MBBI = I;
|
||||
const MachineFunction &MF = *MemMI->getParent()->getParent();
|
||||
|
||||
unsigned DestReg = MemMI->getOperand(0).getReg();
|
||||
unsigned BaseReg = MemMI->getOperand(1).getReg();
|
||||
int Offset = MemMI->getOperand(2).getImm() *
|
||||
TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
|
||||
|
||||
// If the base register overlaps the destination register, we can't
|
||||
// merge the update.
|
||||
if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
|
||||
return E;
|
||||
|
||||
// Scan forward looking for post-index opportunities.
|
||||
// Updating instructions can't be formed if the memory insn already
|
||||
// has an offset other than the value we're looking for.
|
||||
if (Offset != Value)
|
||||
return E;
|
||||
|
||||
// Track which registers have been modified and used between the first insn
|
||||
// (inclusive) and the second insn.
|
||||
BitVector ModifiedRegs, UsedRegs;
|
||||
ModifiedRegs.resize(TRI->getNumRegs());
|
||||
UsedRegs.resize(TRI->getNumRegs());
|
||||
++MBBI;
|
||||
for (unsigned Count = 0; MBBI != E; ++MBBI) {
|
||||
MachineInstr *MI = MBBI;
|
||||
// Skip DBG_VALUE instructions. Otherwise debug info can affect the
|
||||
// optimization by changing how far we scan.
|
||||
if (MI->isDebugValue())
|
||||
continue;
|
||||
|
||||
// Now that we know this is a real instruction, count it.
|
||||
++Count;
|
||||
|
||||
// If we found a match, return it.
|
||||
if (isMatchingUpdateInsn(MI, BaseReg, Value))
|
||||
return MBBI;
|
||||
|
||||
// Update the status of what the instruction clobbered and used.
|
||||
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
|
||||
|
||||
// Otherwise, if the base register is used or modified, we have no match, so
|
||||
// return early.
|
||||
if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
|
||||
return E;
|
||||
}
|
||||
return E;
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator
|
||||
ARM64LoadStoreOpt::findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I,
|
||||
unsigned Limit) {
|
||||
MachineBasicBlock::iterator B = I->getParent()->begin();
|
||||
MachineBasicBlock::iterator E = I->getParent()->end();
|
||||
MachineInstr *MemMI = I;
|
||||
MachineBasicBlock::iterator MBBI = I;
|
||||
const MachineFunction &MF = *MemMI->getParent()->getParent();
|
||||
|
||||
unsigned DestReg = MemMI->getOperand(0).getReg();
|
||||
unsigned BaseReg = MemMI->getOperand(1).getReg();
|
||||
int Offset = MemMI->getOperand(2).getImm();
|
||||
unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
|
||||
|
||||
// If the load/store is the first instruction in the block, there's obviously
|
||||
// not any matching update. Ditto if the memory offset isn't zero.
|
||||
if (MBBI == B || Offset != 0)
|
||||
return E;
|
||||
// If the base register overlaps the destination register, we can't
|
||||
// merge the update.
|
||||
if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
|
||||
return E;
|
||||
|
||||
// Track which registers have been modified and used between the first insn
|
||||
// (inclusive) and the second insn.
|
||||
BitVector ModifiedRegs, UsedRegs;
|
||||
ModifiedRegs.resize(TRI->getNumRegs());
|
||||
UsedRegs.resize(TRI->getNumRegs());
|
||||
--MBBI;
|
||||
for (unsigned Count = 0; MBBI != B; --MBBI) {
|
||||
MachineInstr *MI = MBBI;
|
||||
// Skip DBG_VALUE instructions. Otherwise debug info can affect the
|
||||
// optimization by changing how far we scan.
|
||||
if (MI->isDebugValue())
|
||||
continue;
|
||||
|
||||
// Now that we know this is a real instruction, count it.
|
||||
++Count;
|
||||
|
||||
// If we found a match, return it.
|
||||
if (isMatchingUpdateInsn(MI, BaseReg, RegSize))
|
||||
return MBBI;
|
||||
|
||||
// Update the status of what the instruction clobbered and used.
|
||||
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
|
||||
|
||||
// Otherwise, if the base register is used or modified, we have no match, so
|
||||
// return early.
|
||||
if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
|
||||
return E;
|
||||
}
|
||||
return E;
|
||||
}
|
||||
|
||||
bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
|
||||
bool Modified = false;
|
||||
// Two tranformations to do here:
|
||||
// 1) Find loads and stores that can be merged into a single load or store
|
||||
// pair instruction.
|
||||
// e.g.,
|
||||
// ldr x0, [x2]
|
||||
// ldr x1, [x2, #8]
|
||||
// ; becomes
|
||||
// ldp x0, x1, [x2]
|
||||
// 2) Find base register updates that can be merged into the load or store
|
||||
// as a base-reg writeback.
|
||||
// e.g.,
|
||||
// ldr x0, [x2]
|
||||
// add x2, x2, #4
|
||||
// ; becomes
|
||||
// ldr x0, [x2], #4
|
||||
|
||||
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
|
||||
MBBI != E;) {
|
||||
MachineInstr *MI = MBBI;
|
||||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
// Just move on to the next instruction.
|
||||
++MBBI;
|
||||
break;
|
||||
case ARM64::STRSui:
|
||||
case ARM64::STRDui:
|
||||
case ARM64::STRQui:
|
||||
case ARM64::STRXui:
|
||||
case ARM64::STRWui:
|
||||
case ARM64::LDRSui:
|
||||
case ARM64::LDRDui:
|
||||
case ARM64::LDRQui:
|
||||
case ARM64::LDRXui:
|
||||
case ARM64::LDRWui:
|
||||
// do the unscaled versions as well
|
||||
case ARM64::STURSi:
|
||||
case ARM64::STURDi:
|
||||
case ARM64::STURQi:
|
||||
case ARM64::STURWi:
|
||||
case ARM64::STURXi:
|
||||
case ARM64::LDURSi:
|
||||
case ARM64::LDURDi:
|
||||
case ARM64::LDURQi:
|
||||
case ARM64::LDURWi:
|
||||
case ARM64::LDURXi: {
|
||||
// If this is a volatile load/store, don't mess with it.
|
||||
if (MI->hasOrderedMemoryRef()) {
|
||||
++MBBI;
|
||||
break;
|
||||
}
|
||||
// Make sure this is a reg+imm (as opposed to an address reloc).
|
||||
if (!MI->getOperand(2).isImm()) {
|
||||
++MBBI;
|
||||
break;
|
||||
}
|
||||
// Check if this load/store has a hint to avoid pair formation.
|
||||
// MachineMemOperands hints are set by the ARM64StorePairSuppress pass.
|
||||
if (TII->isLdStPairSuppressed(MI)) {
|
||||
++MBBI;
|
||||
break;
|
||||
}
|
||||
// Look ahead up to ScanLimit instructions for a pairable instruction.
|
||||
bool mergeForward = false;
|
||||
MachineBasicBlock::iterator Paired =
|
||||
findMatchingInsn(MBBI, mergeForward, ScanLimit);
|
||||
if (Paired != E) {
|
||||
// Merge the loads into a pair. Keeping the iterator straight is a
|
||||
// pain, so we let the merge routine tell us what the next instruction
|
||||
// is after it's done mucking about.
|
||||
MBBI = mergePairedInsns(MBBI, Paired, mergeForward);
|
||||
|
||||
Modified = true;
|
||||
++NumPairCreated;
|
||||
if (isUnscaledLdst(MI->getOpcode()))
|
||||
++NumUnscaledPairCreated;
|
||||
break;
|
||||
}
|
||||
++MBBI;
|
||||
break;
|
||||
}
|
||||
// FIXME: Do the other instructions.
|
||||
}
|
||||
}
|
||||
|
||||
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
|
||||
MBBI != E;) {
|
||||
MachineInstr *MI = MBBI;
|
||||
// Do update merging. It's simpler to keep this separate from the above
|
||||
// switch, though not strictly necessary.
|
||||
int Opc = MI->getOpcode();
|
||||
switch (Opc) {
|
||||
default:
|
||||
// Just move on to the next instruction.
|
||||
++MBBI;
|
||||
break;
|
||||
case ARM64::STRSui:
|
||||
case ARM64::STRDui:
|
||||
case ARM64::STRQui:
|
||||
case ARM64::STRXui:
|
||||
case ARM64::STRWui:
|
||||
case ARM64::LDRSui:
|
||||
case ARM64::LDRDui:
|
||||
case ARM64::LDRQui:
|
||||
case ARM64::LDRXui:
|
||||
case ARM64::LDRWui:
|
||||
// do the unscaled versions as well
|
||||
case ARM64::STURSi:
|
||||
case ARM64::STURDi:
|
||||
case ARM64::STURQi:
|
||||
case ARM64::STURWi:
|
||||
case ARM64::STURXi:
|
||||
case ARM64::LDURSi:
|
||||
case ARM64::LDURDi:
|
||||
case ARM64::LDURQi:
|
||||
case ARM64::LDURWi:
|
||||
case ARM64::LDURXi: {
|
||||
// Make sure this is a reg+imm (as opposed to an address reloc).
|
||||
if (!MI->getOperand(2).isImm()) {
|
||||
++MBBI;
|
||||
break;
|
||||
}
|
||||
// Look ahead up to ScanLimit instructions for a mergable instruction.
|
||||
MachineBasicBlock::iterator Update =
|
||||
findMatchingUpdateInsnForward(MBBI, ScanLimit, 0);
|
||||
if (Update != E) {
|
||||
// Merge the update into the ld/st.
|
||||
MBBI = mergePostIdxUpdateInsn(MBBI, Update);
|
||||
Modified = true;
|
||||
++NumPostFolded;
|
||||
break;
|
||||
}
|
||||
// Don't know how to handle pre/post-index versions, so move to the next
|
||||
// instruction.
|
||||
if (isUnscaledLdst(Opc)) {
|
||||
++MBBI;
|
||||
break;
|
||||
}
|
||||
|
||||
// Look back to try to find a pre-index instruction. For example,
|
||||
// add x0, x0, #8
|
||||
// ldr x1, [x0]
|
||||
// merged into:
|
||||
// ldr x1, [x0, #8]!
|
||||
Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit);
|
||||
if (Update != E) {
|
||||
// Merge the update into the ld/st.
|
||||
MBBI = mergePreIdxUpdateInsn(MBBI, Update);
|
||||
Modified = true;
|
||||
++NumPreFolded;
|
||||
break;
|
||||
}
|
||||
|
||||
// Look forward to try to find a post-index instruction. For example,
|
||||
// ldr x1, [x0, #64]
|
||||
// add x0, x0, #64
|
||||
// merged into:
|
||||
// ldr x1, [x0], #64
|
||||
|
||||
// The immediate in the load/store is scaled by the size of the register
|
||||
// being loaded. The immediate in the add we're looking for,
|
||||
// however, is not, so adjust here.
|
||||
int Value = MI->getOperand(2).getImm() *
|
||||
TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent()))
|
||||
->getSize();
|
||||
Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value);
|
||||
if (Update != E) {
|
||||
// Merge the update into the ld/st.
|
||||
MBBI = mergePreIdxUpdateInsn(MBBI, Update);
|
||||
Modified = true;
|
||||
++NumPreFolded;
|
||||
break;
|
||||
}
|
||||
|
||||
// Nothing found. Just move to the next instruction.
|
||||
++MBBI;
|
||||
break;
|
||||
}
|
||||
// FIXME: Do the other instructions.
|
||||
}
|
||||
}
|
||||
|
||||
return Modified;
|
||||
}
|
||||
|
||||
bool ARM64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
|
||||
// Early exit if pass disabled.
|
||||
if (!DoLoadStoreOpt)
|
||||
return false;
|
||||
|
||||
const TargetMachine &TM = Fn.getTarget();
|
||||
TII = static_cast<const ARM64InstrInfo *>(TM.getInstrInfo());
|
||||
TRI = TM.getRegisterInfo();
|
||||
|
||||
bool Modified = false;
|
||||
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
|
||||
++MFI) {
|
||||
MachineBasicBlock &MBB = *MFI;
|
||||
Modified |= optimizeBlock(MBB);
|
||||
}
|
||||
|
||||
return Modified;
|
||||
}
|
||||
|
||||
// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep
|
||||
// loads and stores near one another?
|
||||
|
||||
/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
|
||||
/// optimization pass.
|
||||
FunctionPass *llvm::createARM64LoadStoreOptimizationPass() {
|
||||
return new ARM64LoadStoreOpt();
|
||||
}
|
201
lib/Target/ARM64/ARM64MCInstLower.cpp
Normal file
201
lib/Target/ARM64/ARM64MCInstLower.cpp
Normal file
@ -0,0 +1,201 @@
|
||||
//===-- ARM64MCInstLower.cpp - Convert ARM64 MachineInstr to an MCInst---===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains code to lower ARM64 MachineInstrs to their corresponding
|
||||
// MCInst records.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ARM64MCInstLower.h"
|
||||
#include "MCTargetDesc/ARM64BaseInfo.h"
|
||||
#include "MCTargetDesc/ARM64MCExpr.h"
|
||||
#include "llvm/CodeGen/AsmPrinter.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/IR/Mangler.h"
|
||||
#include "llvm/MC/MCExpr.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/Support/CodeGen.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
using namespace llvm;
|
||||
|
||||
ARM64MCInstLower::ARM64MCInstLower(MCContext &ctx, Mangler &mang,
|
||||
AsmPrinter &printer)
|
||||
: Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {}
|
||||
|
||||
MCSymbol *
|
||||
ARM64MCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
|
||||
return Printer.getSymbol(MO.getGlobal());
|
||||
}
|
||||
|
||||
MCSymbol *
|
||||
ARM64MCInstLower::GetExternalSymbolSymbol(const MachineOperand &MO) const {
|
||||
return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
|
||||
}
|
||||
|
||||
MCOperand ARM64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO,
|
||||
MCSymbol *Sym) const {
|
||||
// FIXME: We would like an efficient form for this, so we don't have to do a
|
||||
// lot of extra uniquing.
|
||||
MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
|
||||
if ((MO.getTargetFlags() & ARM64II::MO_GOT) != 0) {
|
||||
if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
|
||||
RefKind = MCSymbolRefExpr::VK_GOTPAGE;
|
||||
else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) ==
|
||||
ARM64II::MO_PAGEOFF)
|
||||
RefKind = MCSymbolRefExpr::VK_GOTPAGEOFF;
|
||||
else
|
||||
assert(0 && "Unexpected target flags with MO_GOT on GV operand");
|
||||
} else if ((MO.getTargetFlags() & ARM64II::MO_TLS) != 0) {
|
||||
if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
|
||||
RefKind = MCSymbolRefExpr::VK_TLVPPAGE;
|
||||
else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) ==
|
||||
ARM64II::MO_PAGEOFF)
|
||||
RefKind = MCSymbolRefExpr::VK_TLVPPAGEOFF;
|
||||
else
|
||||
llvm_unreachable("Unexpected target flags with MO_TLS on GV operand");
|
||||
} else {
|
||||
if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
|
||||
RefKind = MCSymbolRefExpr::VK_PAGE;
|
||||
else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) ==
|
||||
ARM64II::MO_PAGEOFF)
|
||||
RefKind = MCSymbolRefExpr::VK_PAGEOFF;
|
||||
}
|
||||
const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
|
||||
if (!MO.isJTI() && MO.getOffset())
|
||||
Expr = MCBinaryExpr::CreateAdd(
|
||||
Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx);
|
||||
return MCOperand::CreateExpr(Expr);
|
||||
}
|
||||
|
||||
MCOperand ARM64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
|
||||
MCSymbol *Sym) const {
|
||||
uint32_t RefFlags = 0;
|
||||
|
||||
if (MO.getTargetFlags() & ARM64II::MO_GOT)
|
||||
RefFlags |= ARM64MCExpr::VK_GOT;
|
||||
else if (MO.getTargetFlags() & ARM64II::MO_TLS) {
|
||||
TLSModel::Model Model;
|
||||
if (MO.isGlobal()) {
|
||||
const GlobalValue *GV = MO.getGlobal();
|
||||
Model = Printer.TM.getTLSModel(GV);
|
||||
} else {
|
||||
assert(MO.isSymbol() &&
|
||||
StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" &&
|
||||
"unexpected external TLS symbol");
|
||||
Model = TLSModel::GeneralDynamic;
|
||||
}
|
||||
switch (Model) {
|
||||
case TLSModel::InitialExec:
|
||||
RefFlags |= ARM64MCExpr::VK_GOTTPREL;
|
||||
break;
|
||||
case TLSModel::LocalExec:
|
||||
RefFlags |= ARM64MCExpr::VK_TPREL;
|
||||
break;
|
||||
case TLSModel::LocalDynamic:
|
||||
RefFlags |= ARM64MCExpr::VK_DTPREL;
|
||||
break;
|
||||
case TLSModel::GeneralDynamic:
|
||||
RefFlags |= ARM64MCExpr::VK_TLSDESC;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// No modifier means this is a generic reference, classified as absolute for
|
||||
// the cases where it matters (:abs_g0: etc).
|
||||
RefFlags |= ARM64MCExpr::VK_ABS;
|
||||
}
|
||||
|
||||
if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
|
||||
RefFlags |= ARM64MCExpr::VK_PAGE;
|
||||
else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGEOFF)
|
||||
RefFlags |= ARM64MCExpr::VK_PAGEOFF;
|
||||
else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G3)
|
||||
RefFlags |= ARM64MCExpr::VK_G3;
|
||||
else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G2)
|
||||
RefFlags |= ARM64MCExpr::VK_G2;
|
||||
else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G1)
|
||||
RefFlags |= ARM64MCExpr::VK_G1;
|
||||
else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G0)
|
||||
RefFlags |= ARM64MCExpr::VK_G0;
|
||||
|
||||
if (MO.getTargetFlags() & ARM64II::MO_NC)
|
||||
RefFlags |= ARM64MCExpr::VK_NC;
|
||||
|
||||
const MCExpr *Expr =
|
||||
MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, Ctx);
|
||||
if (!MO.isJTI() && MO.getOffset())
|
||||
Expr = MCBinaryExpr::CreateAdd(
|
||||
Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx);
|
||||
|
||||
ARM64MCExpr::VariantKind RefKind;
|
||||
RefKind = static_cast<ARM64MCExpr::VariantKind>(RefFlags);
|
||||
Expr = ARM64MCExpr::Create(Expr, RefKind, Ctx);
|
||||
|
||||
return MCOperand::CreateExpr(Expr);
|
||||
}
|
||||
|
||||
MCOperand ARM64MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
|
||||
MCSymbol *Sym) const {
|
||||
if (TargetTriple.isOSDarwin())
|
||||
return lowerSymbolOperandDarwin(MO, Sym);
|
||||
|
||||
assert(TargetTriple.isOSBinFormatELF() && "Expect Darwin or ELF target");
|
||||
return lowerSymbolOperandELF(MO, Sym);
|
||||
}
|
||||
|
||||
bool ARM64MCInstLower::lowerOperand(const MachineOperand &MO,
|
||||
MCOperand &MCOp) const {
|
||||
switch (MO.getType()) {
|
||||
default:
|
||||
assert(0 && "unknown operand type");
|
||||
case MachineOperand::MO_Register:
|
||||
// Ignore all implicit register operands.
|
||||
if (MO.isImplicit())
|
||||
return false;
|
||||
MCOp = MCOperand::CreateReg(MO.getReg());
|
||||
break;
|
||||
case MachineOperand::MO_RegisterMask:
|
||||
// Regmasks are like implicit defs.
|
||||
return false;
|
||||
case MachineOperand::MO_Immediate:
|
||||
MCOp = MCOperand::CreateImm(MO.getImm());
|
||||
break;
|
||||
case MachineOperand::MO_MachineBasicBlock:
|
||||
MCOp = MCOperand::CreateExpr(
|
||||
MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx));
|
||||
break;
|
||||
case MachineOperand::MO_GlobalAddress:
|
||||
MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
|
||||
break;
|
||||
case MachineOperand::MO_ExternalSymbol:
|
||||
MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
|
||||
break;
|
||||
case MachineOperand::MO_JumpTableIndex:
|
||||
MCOp = LowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex()));
|
||||
break;
|
||||
case MachineOperand::MO_ConstantPoolIndex:
|
||||
MCOp = LowerSymbolOperand(MO, Printer.GetCPISymbol(MO.getIndex()));
|
||||
break;
|
||||
case MachineOperand::MO_BlockAddress:
|
||||
MCOp = LowerSymbolOperand(
|
||||
MO, Printer.GetBlockAddressSymbol(MO.getBlockAddress()));
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void ARM64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
|
||||
OutMI.setOpcode(MI->getOpcode());
|
||||
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
MCOperand MCOp;
|
||||
if (lowerOperand(MI->getOperand(i), MCOp))
|
||||
OutMI.addOperand(MCOp);
|
||||
}
|
||||
}
|
52
lib/Target/ARM64/ARM64MCInstLower.h
Normal file
52
lib/Target/ARM64/ARM64MCInstLower.h
Normal file
@ -0,0 +1,52 @@
|
||||
//===-- ARM64MCInstLower.h - Lower MachineInstr to MCInst ----------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef ARM64_MCINSTLOWER_H
|
||||
#define ARM64_MCINSTLOWER_H
|
||||
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
|
||||
namespace llvm {
|
||||
class AsmPrinter;
|
||||
class MCAsmInfo;
|
||||
class MCContext;
|
||||
class MCInst;
|
||||
class MCOperand;
|
||||
class MCSymbol;
|
||||
class MachineInstr;
|
||||
class MachineModuleInfoMachO;
|
||||
class MachineOperand;
|
||||
class Mangler;
|
||||
|
||||
/// ARM64MCInstLower - This class is used to lower an MachineInstr
|
||||
/// into an MCInst.
|
||||
class LLVM_LIBRARY_VISIBILITY ARM64MCInstLower {
|
||||
MCContext &Ctx;
|
||||
AsmPrinter &Printer;
|
||||
Triple TargetTriple;
|
||||
|
||||
public:
|
||||
ARM64MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer);
|
||||
|
||||
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
|
||||
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
|
||||
|
||||
MCOperand lowerSymbolOperandDarwin(const MachineOperand &MO,
|
||||
MCSymbol *Sym) const;
|
||||
MCOperand lowerSymbolOperandELF(const MachineOperand &MO,
|
||||
MCSymbol *Sym) const;
|
||||
MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
|
||||
|
||||
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
|
||||
MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
126
lib/Target/ARM64/ARM64MachineFunctionInfo.h
Normal file
126
lib/Target/ARM64/ARM64MachineFunctionInfo.h
Normal file
@ -0,0 +1,126 @@
|
||||
//===- ARM64MachineFuctionInfo.h - ARM64 machine function info --*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file declares ARM64-specific per-machine-function information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef ARM64MACHINEFUNCTIONINFO_H
|
||||
#define ARM64MACHINEFUNCTIONINFO_H
|
||||
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/MC/MCLinkerOptimizationHint.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// ARM64FunctionInfo - This class is derived from MachineFunctionInfo and
|
||||
/// contains private ARM64-specific information for each MachineFunction.
|
||||
class ARM64FunctionInfo : public MachineFunctionInfo {
|
||||
|
||||
/// HasStackFrame - True if this function has a stack frame. Set by
|
||||
/// processFunctionBeforeCalleeSavedScan().
|
||||
bool HasStackFrame;
|
||||
|
||||
/// \brief Amount of stack frame size, not including callee-saved registers.
|
||||
unsigned LocalStackSize;
|
||||
|
||||
/// \brief Number of TLS accesses using the special (combinable)
|
||||
/// _TLS_MODULE_BASE_ symbol.
|
||||
unsigned NumLocalDynamicTLSAccesses;
|
||||
|
||||
/// \brief FrameIndex for start of varargs area for arguments passed on the
|
||||
/// stack.
|
||||
int VarArgsStackIndex;
|
||||
|
||||
/// \brief FrameIndex for start of varargs area for arguments passed in
|
||||
/// general purpose registers.
|
||||
int VarArgsGPRIndex;
|
||||
|
||||
/// \brief Size of the varargs area for arguments passed in general purpose
|
||||
/// registers.
|
||||
unsigned VarArgsGPRSize;
|
||||
|
||||
/// \brief FrameIndex for start of varargs area for arguments passed in
|
||||
/// floating-point registers.
|
||||
int VarArgsFPRIndex;
|
||||
|
||||
/// \brief Size of the varargs area for arguments passed in floating-point
|
||||
/// registers.
|
||||
unsigned VarArgsFPRSize;
|
||||
|
||||
public:
|
||||
ARM64FunctionInfo()
|
||||
: HasStackFrame(false), NumLocalDynamicTLSAccesses(0),
|
||||
VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0),
|
||||
VarArgsFPRIndex(0), VarArgsFPRSize(0) {}
|
||||
|
||||
explicit ARM64FunctionInfo(MachineFunction &MF)
|
||||
: HasStackFrame(false), NumLocalDynamicTLSAccesses(0),
|
||||
VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0),
|
||||
VarArgsFPRIndex(0), VarArgsFPRSize(0) {
|
||||
(void)MF;
|
||||
}
|
||||
|
||||
bool hasStackFrame() const { return HasStackFrame; }
|
||||
void setHasStackFrame(bool s) { HasStackFrame = s; }
|
||||
|
||||
void setLocalStackSize(unsigned Size) { LocalStackSize = Size; }
|
||||
unsigned getLocalStackSize() const { return LocalStackSize; }
|
||||
|
||||
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; }
|
||||
unsigned getNumLocalDynamicTLSAccesses() const {
|
||||
return NumLocalDynamicTLSAccesses;
|
||||
}
|
||||
|
||||
int getVarArgsStackIndex() const { return VarArgsStackIndex; }
|
||||
void setVarArgsStackIndex(int Index) { VarArgsStackIndex = Index; }
|
||||
|
||||
int getVarArgsGPRIndex() const { return VarArgsGPRIndex; }
|
||||
void setVarArgsGPRIndex(int Index) { VarArgsGPRIndex = Index; }
|
||||
|
||||
unsigned getVarArgsGPRSize() const { return VarArgsGPRSize; }
|
||||
void setVarArgsGPRSize(unsigned Size) { VarArgsGPRSize = Size; }
|
||||
|
||||
int getVarArgsFPRIndex() const { return VarArgsFPRIndex; }
|
||||
void setVarArgsFPRIndex(int Index) { VarArgsFPRIndex = Index; }
|
||||
|
||||
unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; }
|
||||
void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; }
|
||||
|
||||
typedef SmallPtrSet<const MachineInstr *, 16> SetOfInstructions;
|
||||
|
||||
const SetOfInstructions &getLOHRelated() const { return LOHRelated; }
|
||||
|
||||
// Shortcuts for LOH related types.
|
||||
typedef LOHDirective<const MachineInstr> MILOHDirective;
|
||||
typedef MILOHDirective::LOHArgs MILOHArgs;
|
||||
|
||||
typedef LOHContainer<const MachineInstr> MILOHContainer;
|
||||
typedef MILOHContainer::LOHDirectives MILOHDirectives;
|
||||
|
||||
const MILOHContainer &getLOHContainer() const { return LOHContainerSet; }
|
||||
|
||||
/// Add a LOH directive of this @p Kind and this @p Args.
|
||||
void addLOHDirective(MCLOHType Kind, const MILOHArgs &Args) {
|
||||
LOHContainerSet.addDirective(Kind, Args);
|
||||
for (MILOHArgs::const_iterator It = Args.begin(), EndIt = Args.end();
|
||||
It != EndIt; ++It)
|
||||
LOHRelated.insert(*It);
|
||||
}
|
||||
|
||||
private:
|
||||
// Hold the lists of LOHs.
|
||||
MILOHContainer LOHContainerSet;
|
||||
SetOfInstructions LOHRelated;
|
||||
};
|
||||
} // End llvm namespace
|
||||
|
||||
#endif // ARM64MACHINEFUNCTIONINFO_H
|
6586
lib/Target/ARM64/ARM64PerfectShuffle.h
Normal file
6586
lib/Target/ARM64/ARM64PerfectShuffle.h
Normal file
File diff suppressed because it is too large
Load Diff
588
lib/Target/ARM64/ARM64PromoteConstant.cpp
Normal file
588
lib/Target/ARM64/ARM64PromoteConstant.cpp
Normal file
@ -0,0 +1,588 @@
|
||||
|
||||
//===-- ARM64PromoteConstant.cpp --- Promote constant to global for ARM64 -===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the ARM64PromoteConstant pass which promotes constant
|
||||
// to global variables when this is likely to be more efficient.
|
||||
// Currently only types related to constant vector (i.e., constant vector, array
|
||||
// of constant vectors, constant structure with a constant vector field, etc.)
|
||||
// are promoted to global variables.
|
||||
// Indeed, constant vector are likely to be lowered in target constant pool
|
||||
// during instruction selection.
|
||||
// Therefore, the access will remain the same (memory load), but the structures
|
||||
// types are not split into different constant pool accesses for each field.
|
||||
// The bonus side effect is that created globals may be merged by the global
|
||||
// merge pass.
|
||||
//
|
||||
// FIXME: This pass may be useful for other targets too.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "arm64-promote-const"
|
||||
#include "ARM64.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/IR/InlineAsm.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
// Stress testing mode - disable heuristics.
|
||||
static cl::opt<bool> Stress("arm64-stress-promote-const", cl::Hidden,
|
||||
cl::desc("Promote all vector constants"));
|
||||
|
||||
STATISTIC(NumPromoted, "Number of promoted constants");
|
||||
STATISTIC(NumPromotedUses, "Number of promoted constants uses");
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ARM64PromoteConstant
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
namespace {
|
||||
/// Promotes interesting constant into global variables.
|
||||
/// The motivating example is:
|
||||
/// static const uint16_t TableA[32] = {
|
||||
/// 41944, 40330, 38837, 37450, 36158, 34953, 33826, 32768,
|
||||
/// 31776, 30841, 29960, 29128, 28340, 27595, 26887, 26215,
|
||||
/// 25576, 24967, 24386, 23832, 23302, 22796, 22311, 21846,
|
||||
/// 21400, 20972, 20561, 20165, 19785, 19419, 19066, 18725,
|
||||
/// };
|
||||
///
|
||||
/// uint8x16x4_t LoadStatic(void) {
|
||||
/// uint8x16x4_t ret;
|
||||
/// ret.val[0] = vld1q_u16(TableA + 0);
|
||||
/// ret.val[1] = vld1q_u16(TableA + 8);
|
||||
/// ret.val[2] = vld1q_u16(TableA + 16);
|
||||
/// ret.val[3] = vld1q_u16(TableA + 24);
|
||||
/// return ret;
|
||||
/// }
|
||||
///
|
||||
/// The constants in that example are folded into the uses. Thus, 4 different
|
||||
/// constants are created.
|
||||
/// As their type is vector the cheapest way to create them is to load them
|
||||
/// for the memory.
|
||||
/// Therefore the final assembly final has 4 different load.
|
||||
/// With this pass enabled, only one load is issued for the constants.
|
||||
class ARM64PromoteConstant : public ModulePass {
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
ARM64PromoteConstant() : ModulePass(ID) {}
|
||||
|
||||
virtual const char *getPassName() const { return "ARM64 Promote Constant"; }
|
||||
|
||||
/// Iterate over the functions and promote the interesting constants into
|
||||
/// global variables with module scope.
|
||||
bool runOnModule(Module &M) {
|
||||
DEBUG(dbgs() << getPassName() << '\n');
|
||||
bool Changed = false;
|
||||
for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
|
||||
++IFn) {
|
||||
Changed |= runOnFunction(*IFn);
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Look for interesting constants used within the given function.
|
||||
/// Promote them into global variables, load these global variables within
|
||||
/// the related function, so that the number of inserted load is minimal.
|
||||
bool runOnFunction(Function &F);
|
||||
|
||||
// This transformation requires dominator info
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesCFG();
|
||||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
AU.addPreserved<DominatorTreeWrapperPass>();
|
||||
}
|
||||
|
||||
/// Type to store a list of User
|
||||
typedef SmallVector<Value::user_iterator, 4> Users;
|
||||
/// Map an insertion point to all the uses it dominates.
|
||||
typedef DenseMap<Instruction *, Users> InsertionPoints;
|
||||
/// Map a function to the required insertion point of load for a
|
||||
/// global variable
|
||||
typedef DenseMap<Function *, InsertionPoints> InsertionPointsPerFunc;
|
||||
|
||||
/// Find the closest point that dominates the given Use.
|
||||
Instruction *findInsertionPoint(Value::user_iterator &Use);
|
||||
|
||||
/// Check if the given insertion point is dominated by an existing
|
||||
/// insertion point.
|
||||
/// If true, the given use is added to the list of dominated uses for
|
||||
/// the related existing point.
|
||||
/// \param NewPt the insertion point to be checked
|
||||
/// \param UseIt the use to be added into the list of dominated uses
|
||||
/// \param InsertPts existing insertion points
|
||||
/// \pre NewPt and all instruction in InsertPts belong to the same function
|
||||
/// \retun true if one of the insertion point in InsertPts dominates NewPt,
|
||||
/// false otherwise
|
||||
bool isDominated(Instruction *NewPt, Value::user_iterator &UseIt,
|
||||
InsertionPoints &InsertPts);
|
||||
|
||||
/// Check if the given insertion point can be merged with an existing
|
||||
/// insertion point in a common dominator.
|
||||
/// If true, the given use is added to the list of the created insertion
|
||||
/// point.
|
||||
/// \param NewPt the insertion point to be checked
|
||||
/// \param UseIt the use to be added into the list of dominated uses
|
||||
/// \param InsertPts existing insertion points
|
||||
/// \pre NewPt and all instruction in InsertPts belong to the same function
|
||||
/// \pre isDominated returns false for the exact same parameters.
|
||||
/// \retun true if it exists an insertion point in InsertPts that could
|
||||
/// have been merged with NewPt in a common dominator,
|
||||
/// false otherwise
|
||||
bool tryAndMerge(Instruction *NewPt, Value::user_iterator &UseIt,
|
||||
InsertionPoints &InsertPts);
|
||||
|
||||
/// Compute the minimal insertion points to dominates all the interesting
|
||||
/// uses of value.
|
||||
/// Insertion points are group per function and each insertion point
|
||||
/// contains a list of all the uses it dominates within the related function
|
||||
/// \param Val constant to be examined
|
||||
/// \param InsPtsPerFunc[out] output storage of the analysis
|
||||
void computeInsertionPoints(Constant *Val,
|
||||
InsertionPointsPerFunc &InsPtsPerFunc);
|
||||
|
||||
/// Insert a definition of a new global variable at each point contained in
|
||||
/// InsPtsPerFunc and update the related uses (also contained in
|
||||
/// InsPtsPerFunc).
|
||||
bool insertDefinitions(Constant *Cst, InsertionPointsPerFunc &InsPtsPerFunc);
|
||||
|
||||
/// Compute the minimal insertion points to dominate all the interesting
|
||||
/// uses of Val and insert a definition of a new global variable
|
||||
/// at these points.
|
||||
/// Also update the uses of Val accordingly.
|
||||
/// Currently a use of Val is considered interesting if:
|
||||
/// - Val is not UndefValue
|
||||
/// - Val is not zeroinitialized
|
||||
/// - Replacing Val per a load of a global variable is valid.
|
||||
/// \see shouldConvert for more details
|
||||
bool computeAndInsertDefinitions(Constant *Val);
|
||||
|
||||
/// Promote the given constant into a global variable if it is expected to
|
||||
/// be profitable.
|
||||
/// \return true if Cst has been promoted
|
||||
bool promoteConstant(Constant *Cst);
|
||||
|
||||
/// Transfer the list of dominated uses of IPI to NewPt in InsertPts.
|
||||
/// Append UseIt to this list and delete the entry of IPI in InsertPts.
|
||||
static void appendAndTransferDominatedUses(Instruction *NewPt,
|
||||
Value::user_iterator &UseIt,
|
||||
InsertionPoints::iterator &IPI,
|
||||
InsertionPoints &InsertPts) {
|
||||
// Record the dominated use
|
||||
IPI->second.push_back(UseIt);
|
||||
// Transfer the dominated uses of IPI to NewPt
|
||||
// Inserting into the DenseMap may invalidate existing iterator.
|
||||
// Keep a copy of the key to find the iterator to erase.
|
||||
Instruction *OldInstr = IPI->first;
|
||||
InsertPts.insert(InsertionPoints::value_type(NewPt, IPI->second));
|
||||
// Erase IPI
|
||||
IPI = InsertPts.find(OldInstr);
|
||||
InsertPts.erase(IPI);
|
||||
}
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
char ARM64PromoteConstant::ID = 0;
|
||||
|
||||
namespace llvm {
|
||||
void initializeARM64PromoteConstantPass(PassRegistry &);
|
||||
}
|
||||
|
||||
INITIALIZE_PASS_BEGIN(ARM64PromoteConstant, "arm64-promote-const",
|
||||
"ARM64 Promote Constant Pass", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
||||
INITIALIZE_PASS_END(ARM64PromoteConstant, "arm64-promote-const",
|
||||
"ARM64 Promote Constant Pass", false, false)
|
||||
|
||||
ModulePass *llvm::createARM64PromoteConstantPass() {
|
||||
return new ARM64PromoteConstant();
|
||||
}
|
||||
|
||||
/// Check if the given type uses a vector type.
|
||||
static bool isConstantUsingVectorTy(const Type *CstTy) {
|
||||
if (CstTy->isVectorTy())
|
||||
return true;
|
||||
if (CstTy->isStructTy()) {
|
||||
for (unsigned EltIdx = 0, EndEltIdx = CstTy->getStructNumElements();
|
||||
EltIdx < EndEltIdx; ++EltIdx)
|
||||
if (isConstantUsingVectorTy(CstTy->getStructElementType(EltIdx)))
|
||||
return true;
|
||||
} else if (CstTy->isArrayTy())
|
||||
return isConstantUsingVectorTy(CstTy->getArrayElementType());
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Check if the given use (Instruction + OpIdx) of Cst should be converted into
|
||||
/// a load of a global variable initialized with Cst.
|
||||
/// A use should be converted if it is legal to do so.
|
||||
/// For instance, it is not legal to turn the mask operand of a shuffle vector
|
||||
/// into a load of a global variable.
|
||||
static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr,
|
||||
unsigned OpIdx) {
|
||||
// shufflevector instruction expects a const for the mask argument, i.e., the
|
||||
// third argument. Do not promote this use in that case.
|
||||
if (isa<const ShuffleVectorInst>(Instr) && OpIdx == 2)
|
||||
return false;
|
||||
|
||||
// extractvalue instruction expects a const idx
|
||||
if (isa<const ExtractValueInst>(Instr) && OpIdx > 0)
|
||||
return false;
|
||||
|
||||
// extractvalue instruction expects a const idx
|
||||
if (isa<const InsertValueInst>(Instr) && OpIdx > 1)
|
||||
return false;
|
||||
|
||||
if (isa<const AllocaInst>(Instr) && OpIdx > 0)
|
||||
return false;
|
||||
|
||||
// Alignment argument must be constant
|
||||
if (isa<const LoadInst>(Instr) && OpIdx > 0)
|
||||
return false;
|
||||
|
||||
// Alignment argument must be constant
|
||||
if (isa<const StoreInst>(Instr) && OpIdx > 1)
|
||||
return false;
|
||||
|
||||
// Index must be constant
|
||||
if (isa<const GetElementPtrInst>(Instr) && OpIdx > 0)
|
||||
return false;
|
||||
|
||||
// Personality function and filters must be constant.
|
||||
// Give up on that instruction.
|
||||
if (isa<const LandingPadInst>(Instr))
|
||||
return false;
|
||||
|
||||
// switch instruction expects constants to compare to
|
||||
if (isa<const SwitchInst>(Instr))
|
||||
return false;
|
||||
|
||||
// Expected address must be a constant
|
||||
if (isa<const IndirectBrInst>(Instr))
|
||||
return false;
|
||||
|
||||
// Do not mess with intrinsic
|
||||
if (isa<const IntrinsicInst>(Instr))
|
||||
return false;
|
||||
|
||||
// Do not mess with inline asm
|
||||
const CallInst *CI = dyn_cast<const CallInst>(Instr);
|
||||
if (CI && isa<const InlineAsm>(CI->getCalledValue()))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Check if the given Cst should be converted into
|
||||
/// a load of a global variable initialized with Cst.
|
||||
/// A constant should be converted if it is likely that the materialization of
|
||||
/// the constant will be tricky. Thus, we give up on zero or undef values.
|
||||
///
|
||||
/// \todo Currently, accept only vector related types.
|
||||
/// Also we give up on all simple vector type to keep the existing
|
||||
/// behavior. Otherwise, we should push here all the check of the lowering of
|
||||
/// BUILD_VECTOR. By giving up, we lose the potential benefit of merging
|
||||
/// constant via global merge and the fact that the same constant is stored
|
||||
/// only once with this method (versus, as many function that uses the constant
|
||||
/// for the regular approach, even for float).
|
||||
/// Again, the simplest solution would be to promote every
|
||||
/// constant and rematerialize them when they are actually cheap to create.
|
||||
static bool shouldConvert(const Constant *Cst) {
|
||||
if (isa<const UndefValue>(Cst))
|
||||
return false;
|
||||
|
||||
// FIXME: In some cases, it may be interesting to promote in memory
|
||||
// a zero initialized constant.
|
||||
// E.g., when the type of Cst require more instructions than the
|
||||
// adrp/add/load sequence or when this sequence can be shared by several
|
||||
// instances of Cst.
|
||||
// Ideally, we could promote this into a global and rematerialize the constant
|
||||
// when it was a bad idea.
|
||||
if (Cst->isZeroValue())
|
||||
return false;
|
||||
|
||||
if (Stress)
|
||||
return true;
|
||||
|
||||
// FIXME: see function \todo
|
||||
if (Cst->getType()->isVectorTy())
|
||||
return false;
|
||||
return isConstantUsingVectorTy(Cst->getType());
|
||||
}
|
||||
|
||||
Instruction *
|
||||
ARM64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) {
|
||||
// If this user is a phi, the insertion point is in the related
|
||||
// incoming basic block
|
||||
PHINode *PhiInst = dyn_cast<PHINode>(*Use);
|
||||
Instruction *InsertionPoint;
|
||||
if (PhiInst)
|
||||
InsertionPoint =
|
||||
PhiInst->getIncomingBlock(Use.getOperandNo())->getTerminator();
|
||||
else
|
||||
InsertionPoint = dyn_cast<Instruction>(*Use);
|
||||
assert(InsertionPoint && "User is not an instruction!");
|
||||
return InsertionPoint;
|
||||
}
|
||||
|
||||
bool ARM64PromoteConstant::isDominated(Instruction *NewPt,
|
||||
Value::user_iterator &UseIt,
|
||||
InsertionPoints &InsertPts) {
|
||||
|
||||
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
|
||||
*NewPt->getParent()->getParent()).getDomTree();
|
||||
|
||||
// Traverse all the existing insertion point and check if one is dominating
|
||||
// NewPt
|
||||
for (InsertionPoints::iterator IPI = InsertPts.begin(),
|
||||
EndIPI = InsertPts.end();
|
||||
IPI != EndIPI; ++IPI) {
|
||||
if (NewPt == IPI->first || DT.dominates(IPI->first, NewPt) ||
|
||||
// When IPI->first is a terminator instruction, DT may think that
|
||||
// the result is defined on the edge.
|
||||
// Here we are testing the insertion point, not the definition.
|
||||
(IPI->first->getParent() != NewPt->getParent() &&
|
||||
DT.dominates(IPI->first->getParent(), NewPt->getParent()))) {
|
||||
// No need to insert this point
|
||||
// Record the dominated use
|
||||
DEBUG(dbgs() << "Insertion point dominated by:\n");
|
||||
DEBUG(IPI->first->print(dbgs()));
|
||||
DEBUG(dbgs() << '\n');
|
||||
IPI->second.push_back(UseIt);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ARM64PromoteConstant::tryAndMerge(Instruction *NewPt,
|
||||
Value::user_iterator &UseIt,
|
||||
InsertionPoints &InsertPts) {
|
||||
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
|
||||
*NewPt->getParent()->getParent()).getDomTree();
|
||||
BasicBlock *NewBB = NewPt->getParent();
|
||||
|
||||
// Traverse all the existing insertion point and check if one is dominated by
|
||||
// NewPt and thus useless or can be combined with NewPt into a common
|
||||
// dominator
|
||||
for (InsertionPoints::iterator IPI = InsertPts.begin(),
|
||||
EndIPI = InsertPts.end();
|
||||
IPI != EndIPI; ++IPI) {
|
||||
BasicBlock *CurBB = IPI->first->getParent();
|
||||
if (NewBB == CurBB) {
|
||||
// Instructions are in the same block.
|
||||
// By construction, NewPt is dominating the other.
|
||||
// Indeed, isDominated returned false with the exact same arguments.
|
||||
DEBUG(dbgs() << "Merge insertion point with:\n");
|
||||
DEBUG(IPI->first->print(dbgs()));
|
||||
DEBUG(dbgs() << "\nat considered insertion point.\n");
|
||||
appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Look for a common dominator
|
||||
BasicBlock *CommonDominator = DT.findNearestCommonDominator(NewBB, CurBB);
|
||||
// If none exists, we cannot merge these two points
|
||||
if (!CommonDominator)
|
||||
continue;
|
||||
|
||||
if (CommonDominator != NewBB) {
|
||||
// By construction, the CommonDominator cannot be CurBB
|
||||
assert(CommonDominator != CurBB &&
|
||||
"Instruction has not been rejected during isDominated check!");
|
||||
// Take the last instruction of the CommonDominator as insertion point
|
||||
NewPt = CommonDominator->getTerminator();
|
||||
}
|
||||
// else, CommonDominator is the block of NewBB, hence NewBB is the last
|
||||
// possible insertion point in that block
|
||||
DEBUG(dbgs() << "Merge insertion point with:\n");
|
||||
DEBUG(IPI->first->print(dbgs()));
|
||||
DEBUG(dbgs() << '\n');
|
||||
DEBUG(NewPt->print(dbgs()));
|
||||
DEBUG(dbgs() << '\n');
|
||||
appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void ARM64PromoteConstant::computeInsertionPoints(
|
||||
Constant *Val, InsertionPointsPerFunc &InsPtsPerFunc) {
|
||||
DEBUG(dbgs() << "** Compute insertion points **\n");
|
||||
for (Value::user_iterator UseIt = Val->user_begin(),
|
||||
EndUseIt = Val->user_end();
|
||||
UseIt != EndUseIt; ++UseIt) {
|
||||
// If the user is not an Instruction, we cannot modify it
|
||||
if (!isa<Instruction>(*UseIt))
|
||||
continue;
|
||||
|
||||
// Filter out uses that should not be converted
|
||||
if (!shouldConvertUse(Val, cast<Instruction>(*UseIt), UseIt.getOperandNo()))
|
||||
continue;
|
||||
|
||||
DEBUG(dbgs() << "Considered use, opidx " << UseIt.getOperandNo() << ":\n");
|
||||
DEBUG((*UseIt)->print(dbgs()));
|
||||
DEBUG(dbgs() << '\n');
|
||||
|
||||
Instruction *InsertionPoint = findInsertionPoint(UseIt);
|
||||
|
||||
DEBUG(dbgs() << "Considered insertion point:\n");
|
||||
DEBUG(InsertionPoint->print(dbgs()));
|
||||
DEBUG(dbgs() << '\n');
|
||||
|
||||
// Check if the current insertion point is useless, i.e., it is dominated
|
||||
// by another one.
|
||||
InsertionPoints &InsertPts =
|
||||
InsPtsPerFunc[InsertionPoint->getParent()->getParent()];
|
||||
if (isDominated(InsertionPoint, UseIt, InsertPts))
|
||||
continue;
|
||||
// This insertion point is useful, check if we can merge some insertion
|
||||
// point in a common dominator or if NewPt dominates an existing one.
|
||||
if (tryAndMerge(InsertionPoint, UseIt, InsertPts))
|
||||
continue;
|
||||
|
||||
DEBUG(dbgs() << "Keep considered insertion point\n");
|
||||
|
||||
// It is definitely useful by its own
|
||||
InsertPts[InsertionPoint].push_back(UseIt);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
ARM64PromoteConstant::insertDefinitions(Constant *Cst,
|
||||
InsertionPointsPerFunc &InsPtsPerFunc) {
|
||||
// We will create one global variable per Module
|
||||
DenseMap<Module *, GlobalVariable *> ModuleToMergedGV;
|
||||
bool HasChanged = false;
|
||||
|
||||
// Traverse all insertion points in all the function
|
||||
for (InsertionPointsPerFunc::iterator FctToInstPtsIt = InsPtsPerFunc.begin(),
|
||||
EndIt = InsPtsPerFunc.end();
|
||||
FctToInstPtsIt != EndIt; ++FctToInstPtsIt) {
|
||||
InsertionPoints &InsertPts = FctToInstPtsIt->second;
|
||||
// Do more check for debug purposes
|
||||
#ifndef NDEBUG
|
||||
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
|
||||
*FctToInstPtsIt->first).getDomTree();
|
||||
#endif
|
||||
GlobalVariable *PromotedGV;
|
||||
assert(!InsertPts.empty() && "Empty uses does not need a definition");
|
||||
|
||||
Module *M = FctToInstPtsIt->first->getParent();
|
||||
DenseMap<Module *, GlobalVariable *>::iterator MapIt =
|
||||
ModuleToMergedGV.find(M);
|
||||
if (MapIt == ModuleToMergedGV.end()) {
|
||||
PromotedGV = new GlobalVariable(
|
||||
*M, Cst->getType(), true, GlobalValue::InternalLinkage, 0,
|
||||
"_PromotedConst", 0, GlobalVariable::NotThreadLocal);
|
||||
PromotedGV->setInitializer(Cst);
|
||||
ModuleToMergedGV[M] = PromotedGV;
|
||||
DEBUG(dbgs() << "Global replacement: ");
|
||||
DEBUG(PromotedGV->print(dbgs()));
|
||||
DEBUG(dbgs() << '\n');
|
||||
++NumPromoted;
|
||||
HasChanged = true;
|
||||
} else {
|
||||
PromotedGV = MapIt->second;
|
||||
}
|
||||
|
||||
for (InsertionPoints::iterator IPI = InsertPts.begin(),
|
||||
EndIPI = InsertPts.end();
|
||||
IPI != EndIPI; ++IPI) {
|
||||
// Create the load of the global variable
|
||||
IRBuilder<> Builder(IPI->first->getParent(), IPI->first);
|
||||
LoadInst *LoadedCst = Builder.CreateLoad(PromotedGV);
|
||||
DEBUG(dbgs() << "**********\n");
|
||||
DEBUG(dbgs() << "New def: ");
|
||||
DEBUG(LoadedCst->print(dbgs()));
|
||||
DEBUG(dbgs() << '\n');
|
||||
|
||||
// Update the dominated uses
|
||||
Users &DominatedUsers = IPI->second;
|
||||
for (Users::iterator UseIt = DominatedUsers.begin(),
|
||||
EndIt = DominatedUsers.end();
|
||||
UseIt != EndIt; ++UseIt) {
|
||||
#ifndef NDEBUG
|
||||
assert((DT.dominates(LoadedCst, cast<Instruction>(**UseIt)) ||
|
||||
(isa<PHINode>(**UseIt) &&
|
||||
DT.dominates(LoadedCst, findInsertionPoint(*UseIt)))) &&
|
||||
"Inserted definition does not dominate all its uses!");
|
||||
#endif
|
||||
DEBUG(dbgs() << "Use to update " << UseIt->getOperandNo() << ":");
|
||||
DEBUG((*UseIt)->print(dbgs()));
|
||||
DEBUG(dbgs() << '\n');
|
||||
(*UseIt)->setOperand(UseIt->getOperandNo(), LoadedCst);
|
||||
++NumPromotedUses;
|
||||
}
|
||||
}
|
||||
}
|
||||
return HasChanged;
|
||||
}
|
||||
|
||||
bool ARM64PromoteConstant::computeAndInsertDefinitions(Constant *Val) {
|
||||
InsertionPointsPerFunc InsertPtsPerFunc;
|
||||
computeInsertionPoints(Val, InsertPtsPerFunc);
|
||||
return insertDefinitions(Val, InsertPtsPerFunc);
|
||||
}
|
||||
|
||||
bool ARM64PromoteConstant::promoteConstant(Constant *Cst) {
|
||||
assert(Cst && "Given variable is not a valid constant.");
|
||||
|
||||
if (!shouldConvert(Cst))
|
||||
return false;
|
||||
|
||||
DEBUG(dbgs() << "******************************\n");
|
||||
DEBUG(dbgs() << "Candidate constant: ");
|
||||
DEBUG(Cst->print(dbgs()));
|
||||
DEBUG(dbgs() << '\n');
|
||||
|
||||
return computeAndInsertDefinitions(Cst);
|
||||
}
|
||||
|
||||
bool ARM64PromoteConstant::runOnFunction(Function &F) {
|
||||
// Look for instructions using constant vector
|
||||
// Promote that constant to a global variable.
|
||||
// Create as few load of this variable as possible and update the uses
|
||||
// accordingly
|
||||
bool LocalChange = false;
|
||||
SmallSet<Constant *, 8> AlreadyChecked;
|
||||
|
||||
for (Function::iterator IBB = F.begin(), IEndBB = F.end(); IBB != IEndBB;
|
||||
++IBB) {
|
||||
for (BasicBlock::iterator II = IBB->begin(), IEndI = IBB->end();
|
||||
II != IEndI; ++II) {
|
||||
// Traverse the operand, looking for constant vectors
|
||||
// Replace them by a load of a global variable of type constant vector
|
||||
for (unsigned OpIdx = 0, EndOpIdx = II->getNumOperands();
|
||||
OpIdx != EndOpIdx; ++OpIdx) {
|
||||
Constant *Cst = dyn_cast<Constant>(II->getOperand(OpIdx));
|
||||
// There is no point is promoting global value, they are already global.
|
||||
// Do not promote constant expression, as they may require some code
|
||||
// expansion.
|
||||
if (Cst && !isa<GlobalValue>(Cst) && !isa<ConstantExpr>(Cst) &&
|
||||
AlreadyChecked.insert(Cst))
|
||||
LocalChange |= promoteConstant(Cst);
|
||||
}
|
||||
}
|
||||
}
|
||||
return LocalChange;
|
||||
}
|
402
lib/Target/ARM64/ARM64RegisterInfo.cpp
Normal file
402
lib/Target/ARM64/ARM64RegisterInfo.cpp
Normal file
@ -0,0 +1,402 @@
|
||||
//===- ARM64RegisterInfo.cpp - ARM64 Register Information -----------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the ARM64 implementation of the TargetRegisterInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ARM64RegisterInfo.h"
|
||||
#include "ARM64FrameLowering.h"
|
||||
#include "ARM64InstrInfo.h"
|
||||
#include "ARM64Subtarget.h"
|
||||
#include "MCTargetDesc/ARM64AddressingModes.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/RegisterScavenging.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetFrameLowering.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
|
||||
#define GET_REGINFO_TARGET_DESC
|
||||
#include "ARM64GenRegisterInfo.inc"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
ARM64RegisterInfo::ARM64RegisterInfo(const ARM64InstrInfo *tii,
|
||||
const ARM64Subtarget *sti)
|
||||
: ARM64GenRegisterInfo(ARM64::LR), TII(tii), STI(sti) {}
|
||||
|
||||
const uint16_t *
|
||||
ARM64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
||||
assert(MF && "Invalid MachineFunction pointer.");
|
||||
if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg)
|
||||
return CSR_ARM64_AllRegs_SaveList;
|
||||
else
|
||||
return CSR_ARM64_AAPCS_SaveList;
|
||||
}
|
||||
|
||||
const uint32_t *
|
||||
ARM64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
|
||||
if (CC == CallingConv::AnyReg)
|
||||
return CSR_ARM64_AllRegs_RegMask;
|
||||
else
|
||||
return CSR_ARM64_AAPCS_RegMask;
|
||||
}
|
||||
|
||||
const uint32_t *ARM64RegisterInfo::getTLSCallPreservedMask() const {
|
||||
if (STI->isTargetDarwin())
|
||||
return CSR_ARM64_TLS_Darwin_RegMask;
|
||||
|
||||
assert(STI->isTargetELF() && "only expect Darwin or ELF TLS");
|
||||
return CSR_ARM64_TLS_ELF_RegMask;
|
||||
}
|
||||
|
||||
const uint32_t *
|
||||
ARM64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
|
||||
// This should return a register mask that is the same as that returned by
|
||||
// getCallPreservedMask but that additionally preserves the register used for
|
||||
// the first i64 argument (which must also be the register used to return a
|
||||
// single i64 return value)
|
||||
//
|
||||
// In case that the calling convention does not use the same register for
|
||||
// both, the function should return NULL (does not currently apply)
|
||||
return CSR_ARM64_AAPCS_ThisReturn_RegMask;
|
||||
}
|
||||
|
||||
BitVector ARM64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
||||
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
|
||||
|
||||
// FIXME: avoid re-calculating this everytime.
|
||||
BitVector Reserved(getNumRegs());
|
||||
Reserved.set(ARM64::SP);
|
||||
Reserved.set(ARM64::XZR);
|
||||
Reserved.set(ARM64::WSP);
|
||||
Reserved.set(ARM64::WZR);
|
||||
|
||||
if (TFI->hasFP(MF) || STI->isTargetDarwin()) {
|
||||
Reserved.set(ARM64::FP);
|
||||
Reserved.set(ARM64::W29);
|
||||
}
|
||||
|
||||
if (STI->isTargetDarwin()) {
|
||||
Reserved.set(ARM64::X18); // Platform register
|
||||
Reserved.set(ARM64::W18);
|
||||
}
|
||||
|
||||
if (hasBasePointer(MF)) {
|
||||
Reserved.set(ARM64::X19);
|
||||
Reserved.set(ARM64::W19);
|
||||
}
|
||||
|
||||
return Reserved;
|
||||
}
|
||||
|
||||
bool ARM64RegisterInfo::isReservedReg(const MachineFunction &MF,
|
||||
unsigned Reg) const {
|
||||
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
|
||||
|
||||
switch (Reg) {
|
||||
default:
|
||||
break;
|
||||
case ARM64::SP:
|
||||
case ARM64::XZR:
|
||||
case ARM64::WSP:
|
||||
case ARM64::WZR:
|
||||
return true;
|
||||
case ARM64::X18:
|
||||
case ARM64::W18:
|
||||
return STI->isTargetDarwin();
|
||||
case ARM64::FP:
|
||||
case ARM64::W29:
|
||||
return TFI->hasFP(MF) || STI->isTargetDarwin();
|
||||
case ARM64::W19:
|
||||
case ARM64::X19:
|
||||
return hasBasePointer(MF);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *
|
||||
ARM64RegisterInfo::getPointerRegClass(const MachineFunction &MF,
|
||||
unsigned Kind) const {
|
||||
return &ARM64::GPR64RegClass;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *
|
||||
ARM64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
|
||||
if (RC == &ARM64::CCRRegClass)
|
||||
return NULL; // Can't copy CPSR.
|
||||
return RC;
|
||||
}
|
||||
|
||||
unsigned ARM64RegisterInfo::getBaseRegister() const { return ARM64::X19; }
|
||||
|
||||
bool ARM64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
|
||||
// In the presence of variable sized objects, if the fixed stack size is
|
||||
// large enough that referencing from the FP won't result in things being
|
||||
// in range relatively often, we can use a base pointer to allow access
|
||||
// from the other direction like the SP normally works.
|
||||
if (MFI->hasVarSizedObjects()) {
|
||||
// Conservatively estimate whether the negative offset from the frame
|
||||
// pointer will be sufficient to reach. If a function has a smallish
|
||||
// frame, it's less likely to have lots of spills and callee saved
|
||||
// space, so it's all more likely to be within range of the frame pointer.
|
||||
// If it's wrong, we'll materialize the constant and still get to the
|
||||
// object; it's just suboptimal. Negative offsets use the unscaled
|
||||
// load/store instructions, which have a 9-bit signed immediate.
|
||||
if (MFI->getLocalFrameSize() < 256)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned ARM64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
|
||||
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
|
||||
|
||||
return TFI->hasFP(MF) ? ARM64::FP : ARM64::SP;
|
||||
}
|
||||
|
||||
bool
|
||||
ARM64RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARM64RegisterInfo::requiresVirtualBaseRegisters(const MachineFunction &MF)
|
||||
const {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ARM64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
// ARM64FrameLowering::resolveFrameIndexReference() can always fall back
|
||||
// to the stack pointer, so only put the emergency spill slot next to the
|
||||
// FP when there's no better way to access it (SP or base pointer).
|
||||
return MFI->hasVarSizedObjects() && !hasBasePointer(MF);
|
||||
}
|
||||
|
||||
bool ARM64RegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF)
|
||||
const {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARM64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const {
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
// Only consider eliminating leaf frames.
|
||||
if (MFI->hasCalls() || (MF.getTarget().Options.DisableFramePointerElim(MF) &&
|
||||
MFI->adjustsStack()))
|
||||
return true;
|
||||
return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
|
||||
}
|
||||
|
||||
/// needsFrameBaseReg - Returns true if the instruction's frame index
|
||||
/// reference would be better served by a base register other than FP
|
||||
/// or SP. Used by LocalStackFrameAllocation to determine which frame index
|
||||
/// references it should create new base registers for.
|
||||
bool ARM64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
|
||||
int64_t Offset) const {
|
||||
for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i)
|
||||
assert(i < MI->getNumOperands() &&
|
||||
"Instr doesn't have FrameIndex operand!");
|
||||
|
||||
// It's the load/store FI references that cause issues, as it can be difficult
|
||||
// to materialize the offset if it won't fit in the literal field. Estimate
|
||||
// based on the size of the local frame and some conservative assumptions
|
||||
// about the rest of the stack frame (note, this is pre-regalloc, so
|
||||
// we don't know everything for certain yet) whether this offset is likely
|
||||
// to be out of range of the immediate. Return true if so.
|
||||
|
||||
// We only generate virtual base registers for loads and stores, so
|
||||
// return false for everything else.
|
||||
if (!MI->mayLoad() && !MI->mayStore())
|
||||
return false;
|
||||
|
||||
// Without a virtual base register, if the function has variable sized
|
||||
// objects, all fixed-size local references will be via the frame pointer,
|
||||
// Approximate the offset and see if it's legal for the instruction.
|
||||
// Note that the incoming offset is based on the SP value at function entry,
|
||||
// so it'll be negative.
|
||||
MachineFunction &MF = *MI->getParent()->getParent();
|
||||
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
|
||||
// Estimate an offset from the frame pointer.
|
||||
// Conservatively assume all GPR callee-saved registers get pushed.
|
||||
// FP, LR, X19-X28, D8-D15. 64-bits each.
|
||||
int64_t FPOffset = Offset - 16 * 20;
|
||||
// Estimate an offset from the stack pointer.
|
||||
// The incoming offset is relating to the SP at the start of the function,
|
||||
// but when we access the local it'll be relative to the SP after local
|
||||
// allocation, so adjust our SP-relative offset by that allocation size.
|
||||
Offset += MFI->getLocalFrameSize();
|
||||
// Assume that we'll have at least some spill slots allocated.
|
||||
// FIXME: This is a total SWAG number. We should run some statistics
|
||||
// and pick a real one.
|
||||
Offset += 128; // 128 bytes of spill slots
|
||||
|
||||
// If there is a frame pointer, try using it.
|
||||
// The FP is only available if there is no dynamic realignment. We
|
||||
// don't know for sure yet whether we'll need that, so we guess based
|
||||
// on whether there are any local variables that would trigger it.
|
||||
if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, FPOffset))
|
||||
return false;
|
||||
|
||||
// If we can reference via the stack pointer or base pointer, try that.
|
||||
// FIXME: This (and the code that resolves the references) can be improved
|
||||
// to only disallow SP relative references in the live range of
|
||||
// the VLA(s). In practice, it's unclear how much difference that
|
||||
// would make, but it may be worth doing.
|
||||
if (isFrameOffsetLegal(MI, Offset))
|
||||
return false;
|
||||
|
||||
// The offset likely isn't legal; we want to allocate a virtual base register.
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARM64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
|
||||
int64_t Offset) const {
|
||||
assert(Offset <= INT_MAX && "Offset too big to fit in int.");
|
||||
assert(MI && "Unable to get the legal offset for nil instruction.");
|
||||
int SaveOffset = Offset;
|
||||
return isARM64FrameOffsetLegal(*MI, SaveOffset) & ARM64FrameOffsetIsLegal;
|
||||
}
|
||||
|
||||
/// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
|
||||
/// at the beginning of the basic block.
|
||||
void ARM64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
|
||||
unsigned BaseReg,
|
||||
int FrameIdx,
|
||||
int64_t Offset) const {
|
||||
MachineBasicBlock::iterator Ins = MBB->begin();
|
||||
DebugLoc DL; // Defaults to "unknown"
|
||||
if (Ins != MBB->end())
|
||||
DL = Ins->getDebugLoc();
|
||||
|
||||
const MCInstrDesc &MCID = TII->get(ARM64::ADDXri);
|
||||
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
|
||||
const MachineFunction &MF = *MBB->getParent();
|
||||
MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF));
|
||||
unsigned Shifter = ARM64_AM::getShifterImm(ARM64_AM::LSL, 0);
|
||||
|
||||
BuildMI(*MBB, Ins, DL, MCID, BaseReg)
|
||||
.addFrameIndex(FrameIdx)
|
||||
.addImm(Offset)
|
||||
.addImm(Shifter);
|
||||
}
|
||||
|
||||
void ARM64RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
|
||||
unsigned BaseReg,
|
||||
int64_t Offset) const {
|
||||
MachineInstr &MI = *I;
|
||||
int Off = Offset; // ARM doesn't need the general 64-bit offsets
|
||||
unsigned i = 0;
|
||||
|
||||
while (!MI.getOperand(i).isFI()) {
|
||||
++i;
|
||||
assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
|
||||
}
|
||||
bool Done = rewriteARM64FrameIndex(MI, i, BaseReg, Off, TII);
|
||||
assert(Done && "Unable to resolve frame index!");
|
||||
(void)Done;
|
||||
}
|
||||
|
||||
void ARM64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
int SPAdj, unsigned FIOperandNum,
|
||||
RegScavenger *RS) const {
|
||||
assert(SPAdj == 0 && "Unexpected");
|
||||
|
||||
MachineInstr &MI = *II;
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
const ARM64FrameLowering *TFI = static_cast<const ARM64FrameLowering *>(
|
||||
MF.getTarget().getFrameLowering());
|
||||
|
||||
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
|
||||
unsigned FrameReg;
|
||||
int Offset;
|
||||
|
||||
// Special handling of dbg_value, stackmap and patchpoint instructions.
|
||||
if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STACKMAP ||
|
||||
MI.getOpcode() == TargetOpcode::PATCHPOINT) {
|
||||
Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg,
|
||||
/*PreferFP=*/true);
|
||||
Offset += MI.getOperand(FIOperandNum + 1).getImm();
|
||||
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
|
||||
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
|
||||
return;
|
||||
}
|
||||
|
||||
// Modify MI as necessary to handle as much of 'Offset' as possible
|
||||
Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg);
|
||||
if (rewriteARM64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
|
||||
return;
|
||||
|
||||
assert((!RS || !RS->isScavengingFrameIndex(FrameIndex)) &&
|
||||
"Emergency spill slot is out of reach");
|
||||
|
||||
// If we get here, the immediate doesn't fit into the instruction. We folded
|
||||
// as much as possible above. Handle the rest, providing a register that is
|
||||
// SP+LargeImm.
|
||||
unsigned ScratchReg =
|
||||
MF.getRegInfo().createVirtualRegister(&ARM64::GPR64RegClass);
|
||||
emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
|
||||
MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true);
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
|
||||
unsigned ARM64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
|
||||
MachineFunction &MF) const {
|
||||
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
|
||||
|
||||
switch (RC->getID()) {
|
||||
default:
|
||||
return 0;
|
||||
case ARM64::GPR32RegClassID:
|
||||
case ARM64::GPR32spRegClassID:
|
||||
case ARM64::GPR32allRegClassID:
|
||||
case ARM64::GPR64spRegClassID:
|
||||
case ARM64::GPR64allRegClassID:
|
||||
case ARM64::GPR64RegClassID:
|
||||
case ARM64::GPR32commonRegClassID:
|
||||
case ARM64::GPR64commonRegClassID:
|
||||
return 32 - 1 // XZR/SP
|
||||
- (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP
|
||||
- STI->isTargetDarwin() // X18 reserved as platform register
|
||||
- hasBasePointer(MF); // X19
|
||||
case ARM64::FPR8RegClassID:
|
||||
case ARM64::FPR16RegClassID:
|
||||
case ARM64::FPR32RegClassID:
|
||||
case ARM64::FPR64RegClassID:
|
||||
case ARM64::FPR128RegClassID:
|
||||
return 32;
|
||||
|
||||
case ARM64::DDRegClassID:
|
||||
case ARM64::DDDRegClassID:
|
||||
case ARM64::DDDDRegClassID:
|
||||
case ARM64::QQRegClassID:
|
||||
case ARM64::QQQRegClassID:
|
||||
case ARM64::QQQQRegClassID:
|
||||
return 32;
|
||||
|
||||
case ARM64::FPR128_loRegClassID:
|
||||
return 16;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace llvm
|
89
lib/Target/ARM64/ARM64RegisterInfo.h
Normal file
89
lib/Target/ARM64/ARM64RegisterInfo.h
Normal file
@ -0,0 +1,89 @@
|
||||
//===- ARM64RegisterInfo.h - ARM64 Register Information Impl ----*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the ARM64 implementation of the MRegisterInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TARGET_ARM64REGISTERINFO_H
|
||||
#define LLVM_TARGET_ARM64REGISTERINFO_H
|
||||
|
||||
#define GET_REGINFO_HEADER
|
||||
#include "ARM64GenRegisterInfo.inc"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class ARM64InstrInfo;
|
||||
class ARM64Subtarget;
|
||||
class MachineFunction;
|
||||
class RegScavenger;
|
||||
class TargetRegisterClass;
|
||||
|
||||
struct ARM64RegisterInfo : public ARM64GenRegisterInfo {
|
||||
private:
|
||||
const ARM64InstrInfo *TII;
|
||||
const ARM64Subtarget *STI;
|
||||
|
||||
public:
|
||||
ARM64RegisterInfo(const ARM64InstrInfo *tii, const ARM64Subtarget *sti);
|
||||
|
||||
/// Code Generation virtual methods...
|
||||
bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
|
||||
const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
|
||||
const uint32_t *getCallPreservedMask(CallingConv::ID) const;
|
||||
|
||||
// Calls involved in thread-local variable lookup save more registers than
|
||||
// normal calls, so they need a different mask to represent this.
|
||||
const uint32_t *getTLSCallPreservedMask() const;
|
||||
|
||||
/// getThisReturnPreservedMask - Returns a call preserved mask specific to the
|
||||
/// case that 'returned' is on an i64 first argument if the calling convention
|
||||
/// is one that can (partially) model this attribute with a preserved mask
|
||||
/// (i.e. it is a calling convention that uses the same register for the first
|
||||
/// i64 argument and an i64 return value)
|
||||
///
|
||||
/// Should return NULL in the case that the calling convention does not have
|
||||
/// this property
|
||||
const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
|
||||
|
||||
BitVector getReservedRegs(const MachineFunction &MF) const;
|
||||
const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
|
||||
unsigned Kind = 0) const;
|
||||
const TargetRegisterClass *
|
||||
getCrossCopyRegClass(const TargetRegisterClass *RC) const;
|
||||
|
||||
bool requiresRegisterScavenging(const MachineFunction &MF) const;
|
||||
bool useFPForScavengingIndex(const MachineFunction &MF) const;
|
||||
bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
|
||||
|
||||
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
|
||||
bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
|
||||
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg,
|
||||
int FrameIdx, int64_t Offset) const;
|
||||
void resolveFrameIndex(MachineBasicBlock::iterator I, unsigned BaseReg,
|
||||
int64_t Offset) const;
|
||||
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
|
||||
unsigned FIOperandNum,
|
||||
RegScavenger *RS = NULL) const;
|
||||
|
||||
bool cannotEliminateFrame(const MachineFunction &MF) const;
|
||||
bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
|
||||
bool hasBasePointer(const MachineFunction &MF) const;
|
||||
unsigned getBaseRegister() const;
|
||||
|
||||
// Debug information queries.
|
||||
unsigned getFrameRegister(const MachineFunction &MF) const;
|
||||
|
||||
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
|
||||
MachineFunction &MF) const;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_TARGET_ARM64REGISTERINFO_H
|
561
lib/Target/ARM64/ARM64RegisterInfo.td
Normal file
561
lib/Target/ARM64/ARM64RegisterInfo.td
Normal file
@ -0,0 +1,561 @@
|
||||
//===- ARM64RegisterInfo.td - Describe the ARM64 Regisers --*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
class ARM64Reg<bits<16> enc, string n, list<Register> subregs = [],
|
||||
list<string> altNames = []>
|
||||
: Register<n, altNames> {
|
||||
let HWEncoding = enc;
|
||||
let Namespace = "ARM64";
|
||||
let SubRegs = subregs;
|
||||
}
|
||||
|
||||
let Namespace = "ARM64" in {
|
||||
def sub_32 : SubRegIndex<32>;
|
||||
|
||||
def bsub : SubRegIndex<8>;
|
||||
def hsub : SubRegIndex<16>;
|
||||
def ssub : SubRegIndex<32>;
|
||||
def dsub : SubRegIndex<32>;
|
||||
def qhisub : SubRegIndex<64>;
|
||||
def qsub : SubRegIndex<64>;
|
||||
// Note: Code depends on these having consecutive numbers
|
||||
def dsub0 : SubRegIndex<64>;
|
||||
def dsub1 : SubRegIndex<64>;
|
||||
def dsub2 : SubRegIndex<64>;
|
||||
def dsub3 : SubRegIndex<64>;
|
||||
// Note: Code depends on these having consecutive numbers
|
||||
def qsub0 : SubRegIndex<128>;
|
||||
def qsub1 : SubRegIndex<128>;
|
||||
def qsub2 : SubRegIndex<128>;
|
||||
def qsub3 : SubRegIndex<128>;
|
||||
}
|
||||
|
||||
let Namespace = "ARM64" in {
|
||||
def vreg : RegAltNameIndex;
|
||||
def vlist1 : RegAltNameIndex;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Registers
|
||||
//===----------------------------------------------------------------------===//
|
||||
def W0 : ARM64Reg<0, "w0" >, DwarfRegNum<[0]>;
|
||||
def W1 : ARM64Reg<1, "w1" >, DwarfRegNum<[1]>;
|
||||
def W2 : ARM64Reg<2, "w2" >, DwarfRegNum<[2]>;
|
||||
def W3 : ARM64Reg<3, "w3" >, DwarfRegNum<[3]>;
|
||||
def W4 : ARM64Reg<4, "w4" >, DwarfRegNum<[4]>;
|
||||
def W5 : ARM64Reg<5, "w5" >, DwarfRegNum<[5]>;
|
||||
def W6 : ARM64Reg<6, "w6" >, DwarfRegNum<[6]>;
|
||||
def W7 : ARM64Reg<7, "w7" >, DwarfRegNum<[7]>;
|
||||
def W8 : ARM64Reg<8, "w8" >, DwarfRegNum<[8]>;
|
||||
def W9 : ARM64Reg<9, "w9" >, DwarfRegNum<[9]>;
|
||||
def W10 : ARM64Reg<10, "w10">, DwarfRegNum<[10]>;
|
||||
def W11 : ARM64Reg<11, "w11">, DwarfRegNum<[11]>;
|
||||
def W12 : ARM64Reg<12, "w12">, DwarfRegNum<[12]>;
|
||||
def W13 : ARM64Reg<13, "w13">, DwarfRegNum<[13]>;
|
||||
def W14 : ARM64Reg<14, "w14">, DwarfRegNum<[14]>;
|
||||
def W15 : ARM64Reg<15, "w15">, DwarfRegNum<[15]>;
|
||||
def W16 : ARM64Reg<16, "w16">, DwarfRegNum<[16]>;
|
||||
def W17 : ARM64Reg<17, "w17">, DwarfRegNum<[17]>;
|
||||
def W18 : ARM64Reg<18, "w18">, DwarfRegNum<[18]>;
|
||||
def W19 : ARM64Reg<19, "w19">, DwarfRegNum<[19]>;
|
||||
def W20 : ARM64Reg<20, "w20">, DwarfRegNum<[20]>;
|
||||
def W21 : ARM64Reg<21, "w21">, DwarfRegNum<[21]>;
|
||||
def W22 : ARM64Reg<22, "w22">, DwarfRegNum<[22]>;
|
||||
def W23 : ARM64Reg<23, "w23">, DwarfRegNum<[23]>;
|
||||
def W24 : ARM64Reg<24, "w24">, DwarfRegNum<[24]>;
|
||||
def W25 : ARM64Reg<25, "w25">, DwarfRegNum<[25]>;
|
||||
def W26 : ARM64Reg<26, "w26">, DwarfRegNum<[26]>;
|
||||
def W27 : ARM64Reg<27, "w27">, DwarfRegNum<[27]>;
|
||||
def W28 : ARM64Reg<28, "w28">, DwarfRegNum<[28]>;
|
||||
def W29 : ARM64Reg<29, "w29">, DwarfRegNum<[29]>;
|
||||
def W30 : ARM64Reg<30, "w30">, DwarfRegNum<[30]>;
|
||||
def WSP : ARM64Reg<31, "wsp">, DwarfRegNum<[31]>;
|
||||
def WZR : ARM64Reg<31, "wzr">, DwarfRegAlias<WSP>;
|
||||
|
||||
let SubRegIndices = [sub_32] in {
|
||||
def X0 : ARM64Reg<0, "x0", [W0]>, DwarfRegAlias<W0>;
|
||||
def X1 : ARM64Reg<1, "x1", [W1]>, DwarfRegAlias<W1>;
|
||||
def X2 : ARM64Reg<2, "x2", [W2]>, DwarfRegAlias<W2>;
|
||||
def X3 : ARM64Reg<3, "x3", [W3]>, DwarfRegAlias<W3>;
|
||||
def X4 : ARM64Reg<4, "x4", [W4]>, DwarfRegAlias<W4>;
|
||||
def X5 : ARM64Reg<5, "x5", [W5]>, DwarfRegAlias<W5>;
|
||||
def X6 : ARM64Reg<6, "x6", [W6]>, DwarfRegAlias<W6>;
|
||||
def X7 : ARM64Reg<7, "x7", [W7]>, DwarfRegAlias<W7>;
|
||||
def X8 : ARM64Reg<8, "x8", [W8]>, DwarfRegAlias<W8>;
|
||||
def X9 : ARM64Reg<9, "x9", [W9]>, DwarfRegAlias<W9>;
|
||||
def X10 : ARM64Reg<10, "x10", [W10]>, DwarfRegAlias<W10>;
|
||||
def X11 : ARM64Reg<11, "x11", [W11]>, DwarfRegAlias<W11>;
|
||||
def X12 : ARM64Reg<12, "x12", [W12]>, DwarfRegAlias<W12>;
|
||||
def X13 : ARM64Reg<13, "x13", [W13]>, DwarfRegAlias<W13>;
|
||||
def X14 : ARM64Reg<14, "x14", [W14]>, DwarfRegAlias<W14>;
|
||||
def X15 : ARM64Reg<15, "x15", [W15]>, DwarfRegAlias<W15>;
|
||||
def X16 : ARM64Reg<16, "x16", [W16]>, DwarfRegAlias<W16>;
|
||||
def X17 : ARM64Reg<17, "x17", [W17]>, DwarfRegAlias<W17>;
|
||||
def X18 : ARM64Reg<18, "x18", [W18]>, DwarfRegAlias<W18>;
|
||||
def X19 : ARM64Reg<19, "x19", [W19]>, DwarfRegAlias<W19>;
|
||||
def X20 : ARM64Reg<20, "x20", [W20]>, DwarfRegAlias<W20>;
|
||||
def X21 : ARM64Reg<21, "x21", [W21]>, DwarfRegAlias<W21>;
|
||||
def X22 : ARM64Reg<22, "x22", [W22]>, DwarfRegAlias<W22>;
|
||||
def X23 : ARM64Reg<23, "x23", [W23]>, DwarfRegAlias<W23>;
|
||||
def X24 : ARM64Reg<24, "x24", [W24]>, DwarfRegAlias<W24>;
|
||||
def X25 : ARM64Reg<25, "x25", [W25]>, DwarfRegAlias<W25>;
|
||||
def X26 : ARM64Reg<26, "x26", [W26]>, DwarfRegAlias<W26>;
|
||||
def X27 : ARM64Reg<27, "x27", [W27]>, DwarfRegAlias<W27>;
|
||||
def X28 : ARM64Reg<28, "x28", [W28]>, DwarfRegAlias<W28>;
|
||||
def FP : ARM64Reg<29, "fp", [W29]>, DwarfRegAlias<W29>;
|
||||
def LR : ARM64Reg<30, "lr", [W30]>, DwarfRegAlias<W30>;
|
||||
def SP : ARM64Reg<31, "sp", [WSP]>, DwarfRegAlias<WSP>;
|
||||
def XZR : ARM64Reg<31, "xzr", [WZR]>, DwarfRegAlias<WSP>;
|
||||
}
|
||||
|
||||
// Condition code register.
|
||||
def CPSR : ARM64Reg<0, "cpsr">;
|
||||
|
||||
// GPR register classes with the intersections of GPR32/GPR32sp and
|
||||
// GPR64/GPR64sp for use by the coalescer.
|
||||
def GPR32common : RegisterClass<"ARM64", [i32], 32, (sequence "W%u", 0, 30)> {
|
||||
let AltOrders = [(rotl GPR32common, 8)];
|
||||
let AltOrderSelect = [{ return 1; }];
|
||||
}
|
||||
def GPR64common : RegisterClass<"ARM64", [i64], 64,
|
||||
(add (sequence "X%u", 0, 28), FP, LR)> {
|
||||
let AltOrders = [(rotl GPR64common, 8)];
|
||||
let AltOrderSelect = [{ return 1; }];
|
||||
}
|
||||
// GPR register classes which exclude SP/WSP.
|
||||
def GPR32 : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR)> {
|
||||
let AltOrders = [(rotl GPR32, 8)];
|
||||
let AltOrderSelect = [{ return 1; }];
|
||||
}
|
||||
def GPR64 : RegisterClass<"ARM64", [i64], 64, (add GPR64common, XZR)> {
|
||||
let AltOrders = [(rotl GPR64, 8)];
|
||||
let AltOrderSelect = [{ return 1; }];
|
||||
}
|
||||
|
||||
// GPR register classes which include SP/WSP.
|
||||
def GPR32sp : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WSP)> {
|
||||
let AltOrders = [(rotl GPR32sp, 8)];
|
||||
let AltOrderSelect = [{ return 1; }];
|
||||
}
|
||||
def GPR64sp : RegisterClass<"ARM64", [i64], 64, (add GPR64common, SP)> {
|
||||
let AltOrders = [(rotl GPR64sp, 8)];
|
||||
let AltOrderSelect = [{ return 1; }];
|
||||
}
|
||||
|
||||
// GPR register classes which include WZR/XZR AND SP/WSP. This is not a
|
||||
// constraint used by any instructions, it is used as a common super-class.
|
||||
def GPR32all : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR, WSP)>;
|
||||
def GPR64all : RegisterClass<"ARM64", [i64], 64, (add GPR64common, XZR, SP)>;
|
||||
|
||||
// For tail calls, we can't use callee-saved registers, as they are restored
|
||||
// to the saved value before the tail call, which would clobber a call address.
|
||||
// This is for indirect tail calls to store the address of the destination.
|
||||
def tcGPR64 : RegisterClass<"ARM64", [i64], 64, (sub GPR64common, X19, X20, X21,
|
||||
X22, X23, X24, X25, X26,
|
||||
X27, X28)>;
|
||||
|
||||
// GPR register classes for post increment ammount of vector load/store that
|
||||
// has alternate printing when Rm=31 and prints a constant immediate value
|
||||
// equal to the total number of bytes transferred.
|
||||
def GPR64pi1 : RegisterOperand<GPR64, "printPostIncOperand1">;
|
||||
def GPR64pi2 : RegisterOperand<GPR64, "printPostIncOperand2">;
|
||||
def GPR64pi3 : RegisterOperand<GPR64, "printPostIncOperand3">;
|
||||
def GPR64pi4 : RegisterOperand<GPR64, "printPostIncOperand4">;
|
||||
def GPR64pi6 : RegisterOperand<GPR64, "printPostIncOperand6">;
|
||||
def GPR64pi8 : RegisterOperand<GPR64, "printPostIncOperand8">;
|
||||
def GPR64pi12 : RegisterOperand<GPR64, "printPostIncOperand12">;
|
||||
def GPR64pi16 : RegisterOperand<GPR64, "printPostIncOperand16">;
|
||||
def GPR64pi24 : RegisterOperand<GPR64, "printPostIncOperand24">;
|
||||
def GPR64pi32 : RegisterOperand<GPR64, "printPostIncOperand32">;
|
||||
def GPR64pi48 : RegisterOperand<GPR64, "printPostIncOperand48">;
|
||||
def GPR64pi64 : RegisterOperand<GPR64, "printPostIncOperand64">;
|
||||
|
||||
// Condition code regclass.
|
||||
def CCR : RegisterClass<"ARM64", [i32], 32, (add CPSR)> {
|
||||
let CopyCost = -1; // Don't allow copying of status registers.
|
||||
|
||||
// CCR is not allocatable.
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Floating Point Scalar Registers
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def B0 : ARM64Reg<0, "b0">, DwarfRegNum<[64]>;
|
||||
def B1 : ARM64Reg<1, "b1">, DwarfRegNum<[65]>;
|
||||
def B2 : ARM64Reg<2, "b2">, DwarfRegNum<[66]>;
|
||||
def B3 : ARM64Reg<3, "b3">, DwarfRegNum<[67]>;
|
||||
def B4 : ARM64Reg<4, "b4">, DwarfRegNum<[68]>;
|
||||
def B5 : ARM64Reg<5, "b5">, DwarfRegNum<[69]>;
|
||||
def B6 : ARM64Reg<6, "b6">, DwarfRegNum<[70]>;
|
||||
def B7 : ARM64Reg<7, "b7">, DwarfRegNum<[71]>;
|
||||
def B8 : ARM64Reg<8, "b8">, DwarfRegNum<[72]>;
|
||||
def B9 : ARM64Reg<9, "b9">, DwarfRegNum<[73]>;
|
||||
def B10 : ARM64Reg<10, "b10">, DwarfRegNum<[74]>;
|
||||
def B11 : ARM64Reg<11, "b11">, DwarfRegNum<[75]>;
|
||||
def B12 : ARM64Reg<12, "b12">, DwarfRegNum<[76]>;
|
||||
def B13 : ARM64Reg<13, "b13">, DwarfRegNum<[77]>;
|
||||
def B14 : ARM64Reg<14, "b14">, DwarfRegNum<[78]>;
|
||||
def B15 : ARM64Reg<15, "b15">, DwarfRegNum<[79]>;
|
||||
def B16 : ARM64Reg<16, "b16">, DwarfRegNum<[80]>;
|
||||
def B17 : ARM64Reg<17, "b17">, DwarfRegNum<[81]>;
|
||||
def B18 : ARM64Reg<18, "b18">, DwarfRegNum<[82]>;
|
||||
def B19 : ARM64Reg<19, "b19">, DwarfRegNum<[83]>;
|
||||
def B20 : ARM64Reg<20, "b20">, DwarfRegNum<[84]>;
|
||||
def B21 : ARM64Reg<21, "b21">, DwarfRegNum<[85]>;
|
||||
def B22 : ARM64Reg<22, "b22">, DwarfRegNum<[86]>;
|
||||
def B23 : ARM64Reg<23, "b23">, DwarfRegNum<[87]>;
|
||||
def B24 : ARM64Reg<24, "b24">, DwarfRegNum<[88]>;
|
||||
def B25 : ARM64Reg<25, "b25">, DwarfRegNum<[89]>;
|
||||
def B26 : ARM64Reg<26, "b26">, DwarfRegNum<[90]>;
|
||||
def B27 : ARM64Reg<27, "b27">, DwarfRegNum<[91]>;
|
||||
def B28 : ARM64Reg<28, "b28">, DwarfRegNum<[92]>;
|
||||
def B29 : ARM64Reg<29, "b29">, DwarfRegNum<[93]>;
|
||||
def B30 : ARM64Reg<30, "b30">, DwarfRegNum<[94]>;
|
||||
def B31 : ARM64Reg<31, "b31">, DwarfRegNum<[95]>;
|
||||
|
||||
let SubRegIndices = [bsub] in {
|
||||
def H0 : ARM64Reg<0, "h0", [B0]>, DwarfRegAlias<B0>;
|
||||
def H1 : ARM64Reg<1, "h1", [B1]>, DwarfRegAlias<B1>;
|
||||
def H2 : ARM64Reg<2, "h2", [B2]>, DwarfRegAlias<B2>;
|
||||
def H3 : ARM64Reg<3, "h3", [B3]>, DwarfRegAlias<B3>;
|
||||
def H4 : ARM64Reg<4, "h4", [B4]>, DwarfRegAlias<B4>;
|
||||
def H5 : ARM64Reg<5, "h5", [B5]>, DwarfRegAlias<B5>;
|
||||
def H6 : ARM64Reg<6, "h6", [B6]>, DwarfRegAlias<B6>;
|
||||
def H7 : ARM64Reg<7, "h7", [B7]>, DwarfRegAlias<B7>;
|
||||
def H8 : ARM64Reg<8, "h8", [B8]>, DwarfRegAlias<B8>;
|
||||
def H9 : ARM64Reg<9, "h9", [B9]>, DwarfRegAlias<B9>;
|
||||
def H10 : ARM64Reg<10, "h10", [B10]>, DwarfRegAlias<B10>;
|
||||
def H11 : ARM64Reg<11, "h11", [B11]>, DwarfRegAlias<B11>;
|
||||
def H12 : ARM64Reg<12, "h12", [B12]>, DwarfRegAlias<B12>;
|
||||
def H13 : ARM64Reg<13, "h13", [B13]>, DwarfRegAlias<B13>;
|
||||
def H14 : ARM64Reg<14, "h14", [B14]>, DwarfRegAlias<B14>;
|
||||
def H15 : ARM64Reg<15, "h15", [B15]>, DwarfRegAlias<B15>;
|
||||
def H16 : ARM64Reg<16, "h16", [B16]>, DwarfRegAlias<B16>;
|
||||
def H17 : ARM64Reg<17, "h17", [B17]>, DwarfRegAlias<B17>;
|
||||
def H18 : ARM64Reg<18, "h18", [B18]>, DwarfRegAlias<B18>;
|
||||
def H19 : ARM64Reg<19, "h19", [B19]>, DwarfRegAlias<B19>;
|
||||
def H20 : ARM64Reg<20, "h20", [B20]>, DwarfRegAlias<B20>;
|
||||
def H21 : ARM64Reg<21, "h21", [B21]>, DwarfRegAlias<B21>;
|
||||
def H22 : ARM64Reg<22, "h22", [B22]>, DwarfRegAlias<B22>;
|
||||
def H23 : ARM64Reg<23, "h23", [B23]>, DwarfRegAlias<B23>;
|
||||
def H24 : ARM64Reg<24, "h24", [B24]>, DwarfRegAlias<B24>;
|
||||
def H25 : ARM64Reg<25, "h25", [B25]>, DwarfRegAlias<B25>;
|
||||
def H26 : ARM64Reg<26, "h26", [B26]>, DwarfRegAlias<B26>;
|
||||
def H27 : ARM64Reg<27, "h27", [B27]>, DwarfRegAlias<B27>;
|
||||
def H28 : ARM64Reg<28, "h28", [B28]>, DwarfRegAlias<B28>;
|
||||
def H29 : ARM64Reg<29, "h29", [B29]>, DwarfRegAlias<B29>;
|
||||
def H30 : ARM64Reg<30, "h30", [B30]>, DwarfRegAlias<B30>;
|
||||
def H31 : ARM64Reg<31, "h31", [B31]>, DwarfRegAlias<B31>;
|
||||
}
|
||||
|
||||
let SubRegIndices = [hsub] in {
|
||||
def S0 : ARM64Reg<0, "s0", [H0]>, DwarfRegAlias<B0>;
|
||||
def S1 : ARM64Reg<1, "s1", [H1]>, DwarfRegAlias<B1>;
|
||||
def S2 : ARM64Reg<2, "s2", [H2]>, DwarfRegAlias<B2>;
|
||||
def S3 : ARM64Reg<3, "s3", [H3]>, DwarfRegAlias<B3>;
|
||||
def S4 : ARM64Reg<4, "s4", [H4]>, DwarfRegAlias<B4>;
|
||||
def S5 : ARM64Reg<5, "s5", [H5]>, DwarfRegAlias<B5>;
|
||||
def S6 : ARM64Reg<6, "s6", [H6]>, DwarfRegAlias<B6>;
|
||||
def S7 : ARM64Reg<7, "s7", [H7]>, DwarfRegAlias<B7>;
|
||||
def S8 : ARM64Reg<8, "s8", [H8]>, DwarfRegAlias<B8>;
|
||||
def S9 : ARM64Reg<9, "s9", [H9]>, DwarfRegAlias<B9>;
|
||||
def S10 : ARM64Reg<10, "s10", [H10]>, DwarfRegAlias<B10>;
|
||||
def S11 : ARM64Reg<11, "s11", [H11]>, DwarfRegAlias<B11>;
|
||||
def S12 : ARM64Reg<12, "s12", [H12]>, DwarfRegAlias<B12>;
|
||||
def S13 : ARM64Reg<13, "s13", [H13]>, DwarfRegAlias<B13>;
|
||||
def S14 : ARM64Reg<14, "s14", [H14]>, DwarfRegAlias<B14>;
|
||||
def S15 : ARM64Reg<15, "s15", [H15]>, DwarfRegAlias<B15>;
|
||||
def S16 : ARM64Reg<16, "s16", [H16]>, DwarfRegAlias<B16>;
|
||||
def S17 : ARM64Reg<17, "s17", [H17]>, DwarfRegAlias<B17>;
|
||||
def S18 : ARM64Reg<18, "s18", [H18]>, DwarfRegAlias<B18>;
|
||||
def S19 : ARM64Reg<19, "s19", [H19]>, DwarfRegAlias<B19>;
|
||||
def S20 : ARM64Reg<20, "s20", [H20]>, DwarfRegAlias<B20>;
|
||||
def S21 : ARM64Reg<21, "s21", [H21]>, DwarfRegAlias<B21>;
|
||||
def S22 : ARM64Reg<22, "s22", [H22]>, DwarfRegAlias<B22>;
|
||||
def S23 : ARM64Reg<23, "s23", [H23]>, DwarfRegAlias<B23>;
|
||||
def S24 : ARM64Reg<24, "s24", [H24]>, DwarfRegAlias<B24>;
|
||||
def S25 : ARM64Reg<25, "s25", [H25]>, DwarfRegAlias<B25>;
|
||||
def S26 : ARM64Reg<26, "s26", [H26]>, DwarfRegAlias<B26>;
|
||||
def S27 : ARM64Reg<27, "s27", [H27]>, DwarfRegAlias<B27>;
|
||||
def S28 : ARM64Reg<28, "s28", [H28]>, DwarfRegAlias<B28>;
|
||||
def S29 : ARM64Reg<29, "s29", [H29]>, DwarfRegAlias<B29>;
|
||||
def S30 : ARM64Reg<30, "s30", [H30]>, DwarfRegAlias<B30>;
|
||||
def S31 : ARM64Reg<31, "s31", [H31]>, DwarfRegAlias<B31>;
|
||||
}
|
||||
|
||||
let SubRegIndices = [ssub], RegAltNameIndices = [vreg, vlist1] in {
|
||||
def D0 : ARM64Reg<0, "d0", [S0], ["v0", ""]>, DwarfRegAlias<B0>;
|
||||
def D1 : ARM64Reg<1, "d1", [S1], ["v1", ""]>, DwarfRegAlias<B1>;
|
||||
def D2 : ARM64Reg<2, "d2", [S2], ["v2", ""]>, DwarfRegAlias<B2>;
|
||||
def D3 : ARM64Reg<3, "d3", [S3], ["v3", ""]>, DwarfRegAlias<B3>;
|
||||
def D4 : ARM64Reg<4, "d4", [S4], ["v4", ""]>, DwarfRegAlias<B4>;
|
||||
def D5 : ARM64Reg<5, "d5", [S5], ["v5", ""]>, DwarfRegAlias<B5>;
|
||||
def D6 : ARM64Reg<6, "d6", [S6], ["v6", ""]>, DwarfRegAlias<B6>;
|
||||
def D7 : ARM64Reg<7, "d7", [S7], ["v7", ""]>, DwarfRegAlias<B7>;
|
||||
def D8 : ARM64Reg<8, "d8", [S8], ["v8", ""]>, DwarfRegAlias<B8>;
|
||||
def D9 : ARM64Reg<9, "d9", [S9], ["v9", ""]>, DwarfRegAlias<B9>;
|
||||
def D10 : ARM64Reg<10, "d10", [S10], ["v10", ""]>, DwarfRegAlias<B10>;
|
||||
def D11 : ARM64Reg<11, "d11", [S11], ["v11", ""]>, DwarfRegAlias<B11>;
|
||||
def D12 : ARM64Reg<12, "d12", [S12], ["v12", ""]>, DwarfRegAlias<B12>;
|
||||
def D13 : ARM64Reg<13, "d13", [S13], ["v13", ""]>, DwarfRegAlias<B13>;
|
||||
def D14 : ARM64Reg<14, "d14", [S14], ["v14", ""]>, DwarfRegAlias<B14>;
|
||||
def D15 : ARM64Reg<15, "d15", [S15], ["v15", ""]>, DwarfRegAlias<B15>;
|
||||
def D16 : ARM64Reg<16, "d16", [S16], ["v16", ""]>, DwarfRegAlias<B16>;
|
||||
def D17 : ARM64Reg<17, "d17", [S17], ["v17", ""]>, DwarfRegAlias<B17>;
|
||||
def D18 : ARM64Reg<18, "d18", [S18], ["v18", ""]>, DwarfRegAlias<B18>;
|
||||
def D19 : ARM64Reg<19, "d19", [S19], ["v19", ""]>, DwarfRegAlias<B19>;
|
||||
def D20 : ARM64Reg<20, "d20", [S20], ["v20", ""]>, DwarfRegAlias<B20>;
|
||||
def D21 : ARM64Reg<21, "d21", [S21], ["v21", ""]>, DwarfRegAlias<B21>;
|
||||
def D22 : ARM64Reg<22, "d22", [S22], ["v22", ""]>, DwarfRegAlias<B22>;
|
||||
def D23 : ARM64Reg<23, "d23", [S23], ["v23", ""]>, DwarfRegAlias<B23>;
|
||||
def D24 : ARM64Reg<24, "d24", [S24], ["v24", ""]>, DwarfRegAlias<B24>;
|
||||
def D25 : ARM64Reg<25, "d25", [S25], ["v25", ""]>, DwarfRegAlias<B25>;
|
||||
def D26 : ARM64Reg<26, "d26", [S26], ["v26", ""]>, DwarfRegAlias<B26>;
|
||||
def D27 : ARM64Reg<27, "d27", [S27], ["v27", ""]>, DwarfRegAlias<B27>;
|
||||
def D28 : ARM64Reg<28, "d28", [S28], ["v28", ""]>, DwarfRegAlias<B28>;
|
||||
def D29 : ARM64Reg<29, "d29", [S29], ["v29", ""]>, DwarfRegAlias<B29>;
|
||||
def D30 : ARM64Reg<30, "d30", [S30], ["v30", ""]>, DwarfRegAlias<B30>;
|
||||
def D31 : ARM64Reg<31, "d31", [S31], ["v31", ""]>, DwarfRegAlias<B31>;
|
||||
}
|
||||
|
||||
let SubRegIndices = [dsub], RegAltNameIndices = [vreg, vlist1] in {
|
||||
def Q0 : ARM64Reg<0, "q0", [D0], ["v0", ""]>, DwarfRegAlias<B0>;
|
||||
def Q1 : ARM64Reg<1, "q1", [D1], ["v1", ""]>, DwarfRegAlias<B1>;
|
||||
def Q2 : ARM64Reg<2, "q2", [D2], ["v2", ""]>, DwarfRegAlias<B2>;
|
||||
def Q3 : ARM64Reg<3, "q3", [D3], ["v3", ""]>, DwarfRegAlias<B3>;
|
||||
def Q4 : ARM64Reg<4, "q4", [D4], ["v4", ""]>, DwarfRegAlias<B4>;
|
||||
def Q5 : ARM64Reg<5, "q5", [D5], ["v5", ""]>, DwarfRegAlias<B5>;
|
||||
def Q6 : ARM64Reg<6, "q6", [D6], ["v6", ""]>, DwarfRegAlias<B6>;
|
||||
def Q7 : ARM64Reg<7, "q7", [D7], ["v7", ""]>, DwarfRegAlias<B7>;
|
||||
def Q8 : ARM64Reg<8, "q8", [D8], ["v8", ""]>, DwarfRegAlias<B8>;
|
||||
def Q9 : ARM64Reg<9, "q9", [D9], ["v9", ""]>, DwarfRegAlias<B9>;
|
||||
def Q10 : ARM64Reg<10, "q10", [D10], ["v10", ""]>, DwarfRegAlias<B10>;
|
||||
def Q11 : ARM64Reg<11, "q11", [D11], ["v11", ""]>, DwarfRegAlias<B11>;
|
||||
def Q12 : ARM64Reg<12, "q12", [D12], ["v12", ""]>, DwarfRegAlias<B12>;
|
||||
def Q13 : ARM64Reg<13, "q13", [D13], ["v13", ""]>, DwarfRegAlias<B13>;
|
||||
def Q14 : ARM64Reg<14, "q14", [D14], ["v14", ""]>, DwarfRegAlias<B14>;
|
||||
def Q15 : ARM64Reg<15, "q15", [D15], ["v15", ""]>, DwarfRegAlias<B15>;
|
||||
def Q16 : ARM64Reg<16, "q16", [D16], ["v16", ""]>, DwarfRegAlias<B16>;
|
||||
def Q17 : ARM64Reg<17, "q17", [D17], ["v17", ""]>, DwarfRegAlias<B17>;
|
||||
def Q18 : ARM64Reg<18, "q18", [D18], ["v18", ""]>, DwarfRegAlias<B18>;
|
||||
def Q19 : ARM64Reg<19, "q19", [D19], ["v19", ""]>, DwarfRegAlias<B19>;
|
||||
def Q20 : ARM64Reg<20, "q20", [D20], ["v20", ""]>, DwarfRegAlias<B20>;
|
||||
def Q21 : ARM64Reg<21, "q21", [D21], ["v21", ""]>, DwarfRegAlias<B21>;
|
||||
def Q22 : ARM64Reg<22, "q22", [D22], ["v22", ""]>, DwarfRegAlias<B22>;
|
||||
def Q23 : ARM64Reg<23, "q23", [D23], ["v23", ""]>, DwarfRegAlias<B23>;
|
||||
def Q24 : ARM64Reg<24, "q24", [D24], ["v24", ""]>, DwarfRegAlias<B24>;
|
||||
def Q25 : ARM64Reg<25, "q25", [D25], ["v25", ""]>, DwarfRegAlias<B25>;
|
||||
def Q26 : ARM64Reg<26, "q26", [D26], ["v26", ""]>, DwarfRegAlias<B26>;
|
||||
def Q27 : ARM64Reg<27, "q27", [D27], ["v27", ""]>, DwarfRegAlias<B27>;
|
||||
def Q28 : ARM64Reg<28, "q28", [D28], ["v28", ""]>, DwarfRegAlias<B28>;
|
||||
def Q29 : ARM64Reg<29, "q29", [D29], ["v29", ""]>, DwarfRegAlias<B29>;
|
||||
def Q30 : ARM64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias<B30>;
|
||||
def Q31 : ARM64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>;
|
||||
}
|
||||
|
||||
def FPR8 : RegisterClass<"ARM64", [untyped], 8, (sequence "B%u", 0, 31)> {
|
||||
let Size = 8;
|
||||
}
|
||||
def FPR16 : RegisterClass<"ARM64", [untyped], 16, (sequence "H%u", 0, 31)> {
|
||||
let Size = 16;
|
||||
}
|
||||
def FPR32 : RegisterClass<"ARM64", [f32, i32], 32,(sequence "S%u", 0, 31)>;
|
||||
def FPR64 : RegisterClass<"ARM64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32,
|
||||
v1i64],
|
||||
64, (sequence "D%u", 0, 31)>;
|
||||
// We don't (yet) have an f128 legal type, so don't use that here. We
|
||||
// normalize 128-bit vectors to v2f64 for arg passing and such, so use
|
||||
// that here.
|
||||
def FPR128 : RegisterClass<"ARM64",
|
||||
[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128],
|
||||
128, (sequence "Q%u", 0, 31)>;
|
||||
|
||||
// The lower 16 vector registers. Some instructions can only take registers
|
||||
// in this range.
|
||||
def FPR128_lo : RegisterClass<"ARM64",
|
||||
[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||
128, (trunc FPR128, 16)>;
|
||||
|
||||
// Pairs, triples, and quads of 64-bit vector registers.
|
||||
def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>;
|
||||
def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2],
|
||||
[(rotl FPR64, 0), (rotl FPR64, 1),
|
||||
(rotl FPR64, 2)]>;
|
||||
def DSeqQuads : RegisterTuples<[dsub0, dsub1, dsub2, dsub3],
|
||||
[(rotl FPR64, 0), (rotl FPR64, 1),
|
||||
(rotl FPR64, 2), (rotl FPR64, 3)]>;
|
||||
def DD : RegisterClass<"ARM64", [untyped], 64, (add DSeqPairs)> {
|
||||
let Size = 128;
|
||||
}
|
||||
def DDD : RegisterClass<"ARM64", [untyped], 64, (add DSeqTriples)> {
|
||||
let Size = 196;
|
||||
}
|
||||
def DDDD : RegisterClass<"ARM64", [untyped], 64, (add DSeqQuads)> {
|
||||
let Size = 256;
|
||||
}
|
||||
|
||||
// Pairs, triples, and quads of 128-bit vector registers.
|
||||
def QSeqPairs : RegisterTuples<[qsub0, qsub1], [(rotl FPR128, 0), (rotl FPR128, 1)]>;
|
||||
def QSeqTriples : RegisterTuples<[qsub0, qsub1, qsub2],
|
||||
[(rotl FPR128, 0), (rotl FPR128, 1),
|
||||
(rotl FPR128, 2)]>;
|
||||
def QSeqQuads : RegisterTuples<[qsub0, qsub1, qsub2, qsub3],
|
||||
[(rotl FPR128, 0), (rotl FPR128, 1),
|
||||
(rotl FPR128, 2), (rotl FPR128, 3)]>;
|
||||
def QQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqPairs)> {
|
||||
let Size = 256;
|
||||
}
|
||||
def QQQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqTriples)> {
|
||||
let Size = 384;
|
||||
}
|
||||
def QQQQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqQuads)> {
|
||||
let Size = 512;
|
||||
}
|
||||
|
||||
|
||||
// Vector operand versions of the FP registers. Alternate name printing and
|
||||
// assmebler matching.
|
||||
def VectorRegAsmOperand : AsmOperandClass { let Name = "VectorReg"; }
|
||||
let ParserMatchClass = VectorRegAsmOperand in {
|
||||
def V64 : RegisterOperand<FPR64, "printVRegOperand">;
|
||||
def V128 : RegisterOperand<FPR128, "printVRegOperand">;
|
||||
def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand">;
|
||||
}
|
||||
|
||||
class TypedVecListAsmOperand<int count, int regsize, int lanes, string kind>
|
||||
: AsmOperandClass {
|
||||
let Name = "TypedVectorList" # count # "_" # lanes # kind;
|
||||
|
||||
let PredicateMethod
|
||||
= "isTypedVectorList<" # count # ", " # lanes # ", '" # kind # "'>";
|
||||
let RenderMethod = "addVectorList" # regsize # "Operands<" # count # ">";
|
||||
}
|
||||
|
||||
class TypedVecListRegOperand<RegisterClass Reg, int lanes, string kind>
|
||||
: RegisterOperand<Reg, "printTypedVectorList<" # lanes # ", '"
|
||||
# kind # "'>">;
|
||||
|
||||
multiclass VectorList<int count, RegisterClass Reg64, RegisterClass Reg128> {
|
||||
// With implicit types (probably on instruction instead). E.g. { v0, v1 }
|
||||
def _64AsmOperand : AsmOperandClass {
|
||||
let Name = NAME # "64";
|
||||
let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">";
|
||||
let RenderMethod = "addVectorList64Operands<" # count # ">";
|
||||
}
|
||||
|
||||
def "64" : RegisterOperand<Reg64, "printImplicitlyTypedVectorList"> {
|
||||
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_64AsmOperand");
|
||||
}
|
||||
|
||||
def _128AsmOperand : AsmOperandClass {
|
||||
let Name = NAME # "128";
|
||||
let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">";
|
||||
let RenderMethod = "addVectorList128Operands<" # count # ">";
|
||||
}
|
||||
|
||||
def "128" : RegisterOperand<Reg128, "printImplicitlyTypedVectorList"> {
|
||||
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_128AsmOperand");
|
||||
}
|
||||
|
||||
// 64-bit register lists with explicit type.
|
||||
|
||||
// { v0.8b, v1.8b }
|
||||
def _8bAsmOperand : TypedVecListAsmOperand<count, 64, 8, "b">;
|
||||
def "8b" : TypedVecListRegOperand<Reg64, 8, "b"> {
|
||||
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8bAsmOperand");
|
||||
}
|
||||
|
||||
// { v0.4h, v1.4h }
|
||||
def _4hAsmOperand : TypedVecListAsmOperand<count, 64, 4, "h">;
|
||||
def "4h" : TypedVecListRegOperand<Reg64, 4, "h"> {
|
||||
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4hAsmOperand");
|
||||
}
|
||||
|
||||
// { v0.2s, v1.2s }
|
||||
def _2sAsmOperand : TypedVecListAsmOperand<count, 64, 2, "s">;
|
||||
def "2s" : TypedVecListRegOperand<Reg64, 2, "s"> {
|
||||
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2sAsmOperand");
|
||||
}
|
||||
|
||||
// { v0.1d, v1.1d }
|
||||
def _1dAsmOperand : TypedVecListAsmOperand<count, 64, 1, "d">;
|
||||
def "1d" : TypedVecListRegOperand<Reg64, 1, "d"> {
|
||||
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_1dAsmOperand");
|
||||
}
|
||||
|
||||
// 128-bit register lists with explicit type
|
||||
|
||||
// { v0.16b, v1.16b }
|
||||
def _16bAsmOperand : TypedVecListAsmOperand<count, 128, 16, "b">;
|
||||
def "16b" : TypedVecListRegOperand<Reg128, 16, "b"> {
|
||||
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_16bAsmOperand");
|
||||
}
|
||||
|
||||
// { v0.8h, v1.8h }
|
||||
def _8hAsmOperand : TypedVecListAsmOperand<count, 128, 8, "h">;
|
||||
def "8h" : TypedVecListRegOperand<Reg128, 8, "h"> {
|
||||
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8hAsmOperand");
|
||||
}
|
||||
|
||||
// { v0.4s, v1.4s }
|
||||
def _4sAsmOperand : TypedVecListAsmOperand<count, 128, 4, "s">;
|
||||
def "4s" : TypedVecListRegOperand<Reg128, 4, "s"> {
|
||||
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4sAsmOperand");
|
||||
}
|
||||
|
||||
// { v0.2d, v1.2d }
|
||||
def _2dAsmOperand : TypedVecListAsmOperand<count, 128, 2, "d">;
|
||||
def "2d" : TypedVecListRegOperand<Reg128, 2, "d"> {
|
||||
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2dAsmOperand");
|
||||
}
|
||||
|
||||
// { v0.b, v1.b }
|
||||
def _bAsmOperand : TypedVecListAsmOperand<count, 128, 0, "b">;
|
||||
def "b" : TypedVecListRegOperand<Reg128, 0, "b"> {
|
||||
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_bAsmOperand");
|
||||
}
|
||||
|
||||
// { v0.h, v1.h }
|
||||
def _hAsmOperand : TypedVecListAsmOperand<count, 128, 0, "h">;
|
||||
def "h" : TypedVecListRegOperand<Reg128, 0, "h"> {
|
||||
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_hAsmOperand");
|
||||
}
|
||||
|
||||
// { v0.s, v1.s }
|
||||
def _sAsmOperand : TypedVecListAsmOperand<count, 128, 0, "s">;
|
||||
def "s" : TypedVecListRegOperand<Reg128, 0, "s"> {
|
||||
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_sAsmOperand");
|
||||
}
|
||||
|
||||
// { v0.d, v1.d }
|
||||
def _dAsmOperand : TypedVecListAsmOperand<count, 128, 0, "d">;
|
||||
def "d" : TypedVecListRegOperand<Reg128, 0, "d"> {
|
||||
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_dAsmOperand");
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
defm VecListOne : VectorList<1, FPR64, FPR128>;
|
||||
defm VecListTwo : VectorList<2, DD, QQ>;
|
||||
defm VecListThree : VectorList<3, DDD, QQQ>;
|
||||
defm VecListFour : VectorList<4, DDDD, QQQQ>;
|
||||
|
||||
|
||||
// Register operand versions of the scalar FP registers.
|
||||
def FPR16Op : RegisterOperand<FPR16, "printOperand">;
|
||||
def FPR32Op : RegisterOperand<FPR32, "printOperand">;
|
||||
def FPR64Op : RegisterOperand<FPR64, "printOperand">;
|
||||
def FPR128Op : RegisterOperand<FPR128, "printOperand">;
|
852
lib/Target/ARM64/ARM64SchedCyclone.td
Normal file
852
lib/Target/ARM64/ARM64SchedCyclone.td
Normal file
@ -0,0 +1,852 @@
|
||||
//=- ARMSchedCyclone.td - ARM64 Cyclone Scheduling Defs ------*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the machine model for ARM64 Cyclone to support
|
||||
// instruction scheduling and other instruction cost heuristics.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def CycloneModel : SchedMachineModel {
|
||||
let IssueWidth = 6; // 6 micro-ops are dispatched per cycle.
|
||||
let MicroOpBufferSize = 192; // Based on the reorder buffer.
|
||||
let LoadLatency = 4; // Optimistic load latency.
|
||||
let MispredictPenalty = 16; // 14-19 cycles are typical.
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Define each kind of processor resource and number available on Cyclone.
|
||||
|
||||
// 4 integer pipes
|
||||
def CyUnitI : ProcResource<4> {
|
||||
let BufferSize = 48;
|
||||
}
|
||||
|
||||
// 2 branch units: I[0..1]
|
||||
def CyUnitB : ProcResource<2> {
|
||||
let Super = CyUnitI;
|
||||
let BufferSize = 24;
|
||||
}
|
||||
|
||||
// 1 indirect-branch unit: I[0]
|
||||
def CyUnitBR : ProcResource<1> {
|
||||
let Super = CyUnitB;
|
||||
}
|
||||
|
||||
// 2 shifter pipes: I[2..3]
|
||||
// When an instruction consumes a CyUnitIS, it also consumes a CyUnitI
|
||||
def CyUnitIS : ProcResource<2> {
|
||||
let Super = CyUnitI;
|
||||
let BufferSize = 24;
|
||||
}
|
||||
|
||||
// 1 mul pipe: I[0]
|
||||
def CyUnitIM : ProcResource<1> {
|
||||
let Super = CyUnitBR;
|
||||
let BufferSize = 32;
|
||||
}
|
||||
|
||||
// 1 div pipe: I[1]
|
||||
def CyUnitID : ProcResource<1> {
|
||||
let Super = CyUnitB;
|
||||
let BufferSize = 16;
|
||||
}
|
||||
|
||||
// 1 integer division unit. This is driven by the ID pipe, but only
|
||||
// consumes the pipe for one cycle at issue and another cycle at writeback.
|
||||
def CyUnitIntDiv : ProcResource<1>;
|
||||
|
||||
// 2 ld/st pipes.
|
||||
def CyUnitLS : ProcResource<2> {
|
||||
let BufferSize = 28;
|
||||
}
|
||||
|
||||
// 3 fp/vector pipes.
|
||||
def CyUnitV : ProcResource<3> {
|
||||
let BufferSize = 48;
|
||||
}
|
||||
// 2 fp/vector arithmetic and multiply pipes: V[0-1]
|
||||
def CyUnitVM : ProcResource<2> {
|
||||
let Super = CyUnitV;
|
||||
let BufferSize = 32;
|
||||
}
|
||||
// 1 fp/vector division/sqrt pipe: V[2]
|
||||
def CyUnitVD : ProcResource<1> {
|
||||
let Super = CyUnitV;
|
||||
let BufferSize = 16;
|
||||
}
|
||||
// 1 fp compare pipe: V[0]
|
||||
def CyUnitVC : ProcResource<1> {
|
||||
let Super = CyUnitVM;
|
||||
let BufferSize = 16;
|
||||
}
|
||||
|
||||
// 2 fp division/square-root units. These are driven by the VD pipe,
|
||||
// but only consume the pipe for one cycle at issue and a cycle at writeback.
|
||||
def CyUnitFloatDiv : ProcResource<2>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Define scheduler read/write resources and latency on Cyclone.
|
||||
// This mirrors sections 7.7-7.9 of the Tuning Guide v1.0.1.
|
||||
|
||||
let SchedModel = CycloneModel in {
|
||||
|
||||
//---
|
||||
// 7.8.1. Moves
|
||||
//---
|
||||
|
||||
// A single nop micro-op (uX).
|
||||
def WriteX : SchedWriteRes<[]> { let Latency = 0; }
|
||||
|
||||
// Move zero is a register rename (to machine register zero).
|
||||
// The move is replaced by a single nop micro-op.
|
||||
// MOVZ Rd, #0
|
||||
// AND Rd, Rzr, #imm
|
||||
def WriteZPred : SchedPredicate<[{TII->isGPRZero(MI)}]>;
|
||||
def WriteImmZ : SchedWriteVariant<[
|
||||
SchedVar<WriteZPred, [WriteX]>,
|
||||
SchedVar<NoSchedPred, [WriteImm]>]>;
|
||||
def : InstRW<[WriteImmZ], (instrs MOVZWi,MOVZXi,ANDWri,ANDXri)>;
|
||||
|
||||
// Move GPR is a register rename and single nop micro-op.
|
||||
// ORR Xd, XZR, Xm
|
||||
// ADD Xd, Xn, #0
|
||||
def WriteIMovPred : SchedPredicate<[{TII->isGPRCopy(MI)}]>;
|
||||
def WriteVMovPred : SchedPredicate<[{TII->isFPRCopy(MI)}]>;
|
||||
def WriteMov : SchedWriteVariant<[
|
||||
SchedVar<WriteIMovPred, [WriteX]>,
|
||||
SchedVar<WriteVMovPred, [WriteX]>,
|
||||
SchedVar<NoSchedPred, [WriteI]>]>;
|
||||
def : InstRW<[WriteMov], (instrs COPY,ORRXrr,ADDXrr)>;
|
||||
|
||||
// Move non-zero immediate is an integer ALU op.
|
||||
// MOVN,MOVZ,MOVK
|
||||
def : WriteRes<WriteImm, [CyUnitI]>;
|
||||
|
||||
//---
|
||||
// 7.8.2-7.8.5. Arithmetic and Logical, Comparison, Conditional,
|
||||
// Shifts and Bitfield Operations
|
||||
//---
|
||||
|
||||
// ADR,ADRP
|
||||
// ADD(S)ri,SUB(S)ri,AND(S)ri,EORri,ORRri
|
||||
// ADD(S)rr,SUB(S)rr,AND(S)rr,BIC(S)rr,EONrr,EORrr,ORNrr,ORRrr
|
||||
// ADC(S),SBC(S)
|
||||
// Aliases: CMN, CMP, TST
|
||||
//
|
||||
// Conditional operations.
|
||||
// CCMNi,CCMPi,CCMNr,CCMPr,
|
||||
// CSEL,CSINC,CSINV,CSNEG
|
||||
//
|
||||
// Bit counting and reversal operations.
|
||||
// CLS,CLZ,RBIT,REV,REV16,REV32
|
||||
def : WriteRes<WriteI, [CyUnitI]>;
|
||||
|
||||
// ADD with shifted register operand is a single micro-op that
|
||||
// consumes a shift pipeline for two cycles.
|
||||
// ADD(S)rs,SUB(S)rs,AND(S)rs,BIC(S)rs,EONrs,EORrs,ORNrs,ORRrs
|
||||
// EXAMPLE: ADDrs Xn, Xm LSL #imm
|
||||
def : WriteRes<WriteISReg, [CyUnitIS]> {
|
||||
let Latency = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
|
||||
// ADD with extended register operand is the same as shifted reg operand.
|
||||
// ADD(S)re,SUB(S)re
|
||||
// EXAMPLE: ADDXre Xn, Xm, UXTB #1
|
||||
def : WriteRes<WriteIEReg, [CyUnitIS]> {
|
||||
let Latency = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
|
||||
// Variable shift and bitfield operations.
|
||||
// ASRV,LSLV,LSRV,RORV,BFM,SBFM,UBFM
|
||||
def : WriteRes<WriteIS, [CyUnitIS]>;
|
||||
|
||||
// EXTR Shifts a pair of registers and requires two micro-ops.
|
||||
// The second micro-op is delayed, as modeled by ReadExtrHi.
|
||||
// EXTR Xn, Xm, #imm
|
||||
def : WriteRes<WriteExtr, [CyUnitIS, CyUnitIS]> {
|
||||
let Latency = 2;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
|
||||
// EXTR's first register read is delayed by one cycle, effectively
|
||||
// shortening its writer's latency.
|
||||
// EXTR Xn, Xm, #imm
|
||||
def : ReadAdvance<ReadExtrHi, 1>;
|
||||
|
||||
//---
|
||||
// 7.8.6. Multiplies
|
||||
//---
|
||||
|
||||
// MUL/MNEG are aliases for MADD/MSUB.
|
||||
// MADDW,MSUBW,SMADDL,SMSUBL,UMADDL,UMSUBL
|
||||
def : WriteRes<WriteIM32, [CyUnitIM]> {
|
||||
let Latency = 4;
|
||||
}
|
||||
// MADDX,MSUBX,SMULH,UMULH
|
||||
def : WriteRes<WriteIM64, [CyUnitIM]> {
|
||||
let Latency = 5;
|
||||
}
|
||||
|
||||
//---
|
||||
// 7.8.7. Divide
|
||||
//---
|
||||
|
||||
// 32-bit divide takes 7-13 cycles. 10 cycles covers a 20-bit quotient.
|
||||
// The ID pipe is consumed for 2 cycles: issue and writeback.
|
||||
// SDIVW,UDIVW
|
||||
def : WriteRes<WriteID32, [CyUnitID, CyUnitIntDiv]> {
|
||||
let Latency = 10;
|
||||
let ResourceCycles = [2, 10];
|
||||
}
|
||||
// 64-bit divide takes 7-21 cycles. 13 cycles covers a 32-bit quotient.
|
||||
// The ID pipe is consumed for 2 cycles: issue and writeback.
|
||||
// SDIVX,UDIVX
|
||||
def : WriteRes<WriteID64, [CyUnitID, CyUnitIntDiv]> {
|
||||
let Latency = 13;
|
||||
let ResourceCycles = [2, 13];
|
||||
}
|
||||
|
||||
//---
|
||||
// 7.8.8,7.8.10. Load/Store, single element
|
||||
//---
|
||||
|
||||
// Integer loads take 4 cycles and use one LS unit for one cycle.
|
||||
def : WriteRes<WriteLD, [CyUnitLS]> {
|
||||
let Latency = 4;
|
||||
}
|
||||
|
||||
// Store-load forwarding is 4 cycles.
|
||||
//
|
||||
// Note: The store-exclusive sequence incorporates this
|
||||
// latency. However, general heuristics should not model the
|
||||
// dependence between a store and subsequent may-alias load because
|
||||
// hardware speculation works.
|
||||
def : WriteRes<WriteST, [CyUnitLS]> {
|
||||
let Latency = 4;
|
||||
}
|
||||
|
||||
// Load from base address plus an optionally scaled register offset.
|
||||
// Rt latency is latency WriteIS + WriteLD.
|
||||
// EXAMPLE: LDR Xn, Xm [, lsl 3]
|
||||
def CyWriteLDIdx : SchedWriteVariant<[
|
||||
SchedVar<ScaledIdxPred, [WriteIS, WriteLD]>, // Load from scaled register.
|
||||
SchedVar<NoSchedPred, [WriteLD]>]>; // Load from register offset.
|
||||
def : SchedAlias<WriteLDIdx, CyWriteLDIdx>; // Map ARM64->Cyclone type.
|
||||
|
||||
// EXAMPLE: STR Xn, Xm [, lsl 3]
|
||||
def CyWriteSTIdx : SchedWriteVariant<[
|
||||
SchedVar<ScaledIdxPred, [WriteIS, WriteST]>, // Store to scaled register.
|
||||
SchedVar<NoSchedPred, [WriteST]>]>; // Store to register offset.
|
||||
def : SchedAlias<WriteSTIdx, CyWriteSTIdx>; // Map ARM64->Cyclone type.
|
||||
|
||||
// Read the (unshifted) base register Xn in the second micro-op one cycle later.
|
||||
// EXAMPLE: LDR Xn, Xm [, lsl 3]
|
||||
def ReadBaseRS : SchedReadAdvance<1>;
|
||||
def CyReadAdrBase : SchedReadVariant<[
|
||||
SchedVar<ScaledIdxPred, [ReadBaseRS]>, // Read base reg after shifting offset.
|
||||
SchedVar<NoSchedPred, [ReadDefault]>]>; // Read base reg with no shift.
|
||||
def : SchedAlias<ReadAdrBase, CyReadAdrBase>; // Map ARM64->Cyclone type.
|
||||
|
||||
//---
|
||||
// 7.8.9,7.8.11. Load/Store, paired
|
||||
//---
|
||||
|
||||
// Address pre/post increment is a simple ALU op with one cycle latency.
|
||||
def : WriteRes<WriteAdr, [CyUnitI]>;
|
||||
|
||||
// LDP high register write is fused with the load, but a nop micro-op remains.
|
||||
def : WriteRes<WriteLDHi, []> {
|
||||
let Latency = 4;
|
||||
}
|
||||
|
||||
// STP is a vector op and store, except for QQ, which is just two stores.
|
||||
def : SchedAlias<WriteSTP, WriteVSTShuffle>;
|
||||
def : InstRW<[WriteST, WriteST], (instrs STPQi)>;
|
||||
|
||||
//---
|
||||
// 7.8.13. Branches
|
||||
//---
|
||||
|
||||
// Branches take a single micro-op.
|
||||
// The misprediction penalty is defined as a SchedMachineModel property.
|
||||
def : WriteRes<WriteBr, [CyUnitB]> {let Latency = 0;}
|
||||
def : WriteRes<WriteBrReg, [CyUnitBR]> {let Latency = 0;}
|
||||
|
||||
//---
|
||||
// 7.8.14. Never-issued Instructions, Barrier and Hint Operations
|
||||
//---
|
||||
|
||||
// NOP,SEV,SEVL,WFE,WFI,YIELD
|
||||
def : WriteRes<WriteHint, []> {let Latency = 0;}
|
||||
// ISB
|
||||
def : InstRW<[WriteI], (instrs ISB)>;
|
||||
// SLREX,DMB,DSB
|
||||
def : WriteRes<WriteBarrier, [CyUnitLS]>;
|
||||
|
||||
// System instructions get an invalid latency because the latency of
|
||||
// other operations across them is meaningless.
|
||||
def : WriteRes<WriteSys, []> {let Latency = -1;}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// 7.9 Vector Unit Instructions
|
||||
|
||||
// Simple vector operations take 2 cycles.
|
||||
def : WriteRes<WriteV, [CyUnitV]> {let Latency = 2;}
|
||||
|
||||
// Define some longer latency vector op types for Cyclone.
|
||||
def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
|
||||
def CyWriteV4 : SchedWriteRes<[CyUnitV]> {let Latency = 4;}
|
||||
def CyWriteV5 : SchedWriteRes<[CyUnitV]> {let Latency = 5;}
|
||||
def CyWriteV6 : SchedWriteRes<[CyUnitV]> {let Latency = 6;}
|
||||
|
||||
// Simple floating-point operations take 2 cycles.
|
||||
def : WriteRes<WriteF, [CyUnitV]> {let Latency = 2;}
|
||||
|
||||
//---
|
||||
// 7.9.1 Vector Moves
|
||||
//---
|
||||
|
||||
// TODO: Add Cyclone-specific zero-cycle zeros. LLVM currently
|
||||
// generates expensive int-float conversion instead:
|
||||
// FMOVDi Dd, #0.0
|
||||
// FMOVv2f64ns Vd.2d, #0.0
|
||||
|
||||
// FMOVSi,FMOVDi
|
||||
def : WriteRes<WriteFImm, [CyUnitV]> {let Latency = 2;}
|
||||
|
||||
// MOVI,MVNI are WriteV
|
||||
// FMOVv2f32ns,FMOVv2f64ns,FMOVv4f32ns are WriteV
|
||||
|
||||
// Move FPR is a register rename and single nop micro-op.
|
||||
// ORR.16b Vd,Vn,Vn
|
||||
// COPY is handled above in the WriteMov Variant.
|
||||
def WriteVMov : SchedWriteVariant<[
|
||||
SchedVar<WriteVMovPred, [WriteX]>,
|
||||
SchedVar<NoSchedPred, [WriteV]>]>;
|
||||
def : InstRW<[WriteVMov], (instrs ORRv16i8)>;
|
||||
|
||||
// FMOVSr,FMOVDr are WriteF.
|
||||
|
||||
// MOV V,V is a WriteV.
|
||||
|
||||
// CPY D,V[x] is a WriteV
|
||||
|
||||
// INS V[x],V[y] is a WriteV.
|
||||
|
||||
// FMOVWSr,FMOVXDr,FMOVXDHighr
|
||||
def : SchedAlias<WriteFCopy, WriteVLD>;
|
||||
|
||||
// FMOVSWr,FMOVDXr
|
||||
def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>;
|
||||
|
||||
// INS V[x],R
|
||||
def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteV]>;
|
||||
def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>;
|
||||
|
||||
// SMOV,UMOV R,V[x]
|
||||
def CyWriteCopyToGPR : WriteSequence<[WriteLD, WriteI]>;
|
||||
def : InstRW<[CyWriteCopyToGPR], (instregex "SMOVv","UMOVv")>;
|
||||
|
||||
// DUP V,R
|
||||
def : InstRW<[CyWriteCopyToFPR], (instregex "DUPv")>;
|
||||
|
||||
// DUP V,V[x] is a WriteV.
|
||||
|
||||
//---
|
||||
// 7.9.2 Integer Arithmetic, Logical, and Comparisons
|
||||
//---
|
||||
|
||||
// BIC,ORR V,#imm are WriteV
|
||||
|
||||
def : InstRW<[CyWriteV3], (instregex "ABSv")>;
|
||||
|
||||
// MVN,NEG,NOT are WriteV
|
||||
|
||||
def : InstRW<[CyWriteV3], (instregex "SQABSv","SQNEGv")>;
|
||||
|
||||
// ADDP is a WriteV.
|
||||
def CyWriteVADDLP : SchedWriteRes<[CyUnitV]> {let Latency = 2;}
|
||||
def : InstRW<[CyWriteVADDLP], (instregex "SADDLPv","UADDLPv")>;
|
||||
|
||||
def : InstRW<[CyWriteV3],
|
||||
(instregex "ADDVv","SMAXVv","UMAXVv","SMINVv","UMINVv")>;
|
||||
|
||||
def : InstRW<[CyWriteV3], (instregex "SADDLV","UADDLV")>;
|
||||
|
||||
// ADD,SUB are WriteV
|
||||
|
||||
// Forward declare.
|
||||
def CyWriteVABD : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
|
||||
|
||||
// Add/Diff and accumulate uses the vector multiply unit.
|
||||
def CyWriteVAccum : SchedWriteRes<[CyUnitVM]> {let Latency = 3;}
|
||||
def CyReadVAccum : SchedReadAdvance<1,
|
||||
[CyWriteVAccum, CyWriteVADDLP, CyWriteVABD]>;
|
||||
|
||||
def : InstRW<[CyWriteVAccum, CyReadVAccum],
|
||||
(instregex "SADALP","UADALP")>;
|
||||
|
||||
def : InstRW<[CyWriteVAccum, CyReadVAccum],
|
||||
(instregex "SABAv","UABAv","SABALv","UABALv")>;
|
||||
|
||||
def : InstRW<[CyWriteV3], (instregex "SQADDv","SQSUBv","UQADDv","UQSUBv")>;
|
||||
|
||||
def : InstRW<[CyWriteV3], (instregex "SUQADDv","USQADDv")>;
|
||||
|
||||
def : InstRW<[CyWriteV4], (instregex "ADDHNv","RADDHNv", "RSUBHNv", "SUBHNv")>;
|
||||
|
||||
// WriteV includes:
|
||||
// AND,BIC,CMTST,EOR,ORN,ORR
|
||||
// ADDP
|
||||
// SHADD,SHSUB,SRHADD,UHADD,UHSUB,URHADD
|
||||
// SADDL,SSUBL,UADDL,USUBL
|
||||
// SADDW,SSUBW,UADDW,USUBW
|
||||
|
||||
def : InstRW<[CyWriteV3], (instregex "CMEQv","CMGEv","CMGTv",
|
||||
"CMLEv","CMLTv",
|
||||
"CMHIv","CMHSv")>;
|
||||
|
||||
def : InstRW<[CyWriteV3], (instregex "SMAXv","SMINv","UMAXv","UMINv",
|
||||
"SMAXPv","SMINPv","UMAXPv","UMINPv")>;
|
||||
|
||||
def : InstRW<[CyWriteVABD], (instregex "SABDv","UABDv",
|
||||
"SABDLv","UABDLv")>;
|
||||
|
||||
//---
|
||||
// 7.9.3 Floating Point Arithmetic and Comparisons
|
||||
//---
|
||||
|
||||
// FABS,FNEG are WriteF
|
||||
|
||||
def : InstRW<[CyWriteV4], (instrs FADDPv2i32p)>;
|
||||
def : InstRW<[CyWriteV5], (instrs FADDPv2i64p)>;
|
||||
|
||||
def : InstRW<[CyWriteV3], (instregex "FMAXPv2i","FMAXNMPv2i",
|
||||
"FMINPv2i","FMINNMPv2i")>;
|
||||
|
||||
def : InstRW<[CyWriteV4], (instregex "FMAXVv","FMAXNMVv","FMINVv","FMINNMVv")>;
|
||||
|
||||
def : InstRW<[CyWriteV4], (instrs FADDSrr,FADDv2f32,FADDv4f32,
|
||||
FSUBSrr,FSUBv2f32,FSUBv4f32,
|
||||
FADDPv2f32,FADDPv4f32,
|
||||
FABD32,FABDv2f32,FABDv4f32)>;
|
||||
def : InstRW<[CyWriteV5], (instrs FADDDrr,FADDv2f64,
|
||||
FSUBDrr,FSUBv2f64,
|
||||
FADDPv2f64,
|
||||
FABD64,FABDv2f64)>;
|
||||
|
||||
def : InstRW<[CyWriteV3], (instregex "FCMEQ","FCMGT","FCMLE","FCMLT")>;
|
||||
|
||||
def : InstRW<[CyWriteV3], (instregex "FACGE","FACGT",
|
||||
"FMAXS","FMAXD","FMAXv",
|
||||
"FMINS","FMIND","FMINv",
|
||||
"FMAXNMS","FMAXNMD","FMAXNMv",
|
||||
"FMINNMS","FMINNMD","FMINNMv",
|
||||
"FMAXPv2f","FMAXPv4f",
|
||||
"FMINPv2f","FMINPv4f",
|
||||
"FMAXNMPv2f","FMAXNMPv4f",
|
||||
"FMINNMPv2f","FMINNMPv4f")>;
|
||||
|
||||
// FCMP,FCMPE,FCCMP,FCCMPE
|
||||
def : WriteRes<WriteFCmp, [CyUnitVC]> {let Latency = 4;}
|
||||
|
||||
// FCSEL is a WriteF.
|
||||
|
||||
//---
|
||||
// 7.9.4 Shifts and Bitfield Operations
|
||||
//---
|
||||
|
||||
// SHL is a WriteV
|
||||
|
||||
def CyWriteVSHR : SchedWriteRes<[CyUnitV]> {let Latency = 2;}
|
||||
def : InstRW<[CyWriteVSHR], (instregex "SSHRv","USHRv")>;
|
||||
|
||||
def CyWriteVSRSHR : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
|
||||
def : InstRW<[CyWriteVSRSHR], (instregex "SRSHRv","URSHRv")>;
|
||||
|
||||
// Shift and accumulate uses the vector multiply unit.
|
||||
def CyWriteVShiftAcc : SchedWriteRes<[CyUnitVM]> {let Latency = 3;}
|
||||
def CyReadVShiftAcc : SchedReadAdvance<1,
|
||||
[CyWriteVShiftAcc, CyWriteVSHR, CyWriteVSRSHR]>;
|
||||
def : InstRW<[CyWriteVShiftAcc, CyReadVShiftAcc],
|
||||
(instregex "SRSRAv","SSRAv","URSRAv","USRAv")>;
|
||||
|
||||
// SSHL,USHL are WriteV.
|
||||
|
||||
def : InstRW<[CyWriteV3], (instregex "SRSHLv","URSHLv")>;
|
||||
|
||||
// SQSHL,SQSHLU,UQSHL are WriteV.
|
||||
|
||||
def : InstRW<[CyWriteV3], (instregex "SQRSHLv","UQRSHLv")>;
|
||||
|
||||
// WriteV includes:
|
||||
// SHLL,SSHLL,USHLL
|
||||
// SLI,SRI
|
||||
// BIF,BIT,BSL
|
||||
// EXT
|
||||
// CLS,CLZ,CNT,RBIT,REV16,REV32,REV64,XTN
|
||||
// XTN2
|
||||
|
||||
def : InstRW<[CyWriteV4],
|
||||
(instregex "RSHRNv","SHRNv",
|
||||
"SQRSHRNv","SQRSHRUNv","SQSHRNv","SQSHRUNv",
|
||||
"UQRSHRNv","UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>;
|
||||
|
||||
//---
|
||||
// 7.9.5 Multiplication
|
||||
//---
|
||||
|
||||
def CyWriteVMul : SchedWriteRes<[CyUnitVM]> { let Latency = 4;}
|
||||
def : InstRW<[CyWriteVMul], (instregex "MULv","SMULLv","UMULLv",
|
||||
"SQDMULLv","SQDMULHv","SQRDMULHv")>;
|
||||
|
||||
// FMUL,FMULX,FNMUL default to WriteFMul.
|
||||
def : WriteRes<WriteFMul, [CyUnitVM]> { let Latency = 4;}
|
||||
|
||||
def CyWriteV64Mul : SchedWriteRes<[CyUnitVM]> { let Latency = 5;}
|
||||
def : InstRW<[CyWriteV64Mul], (instrs FMULDrr,FMULv2f64,FMULv2i64_indexed,
|
||||
FNMULDrr,FMULX64,FMULXv2f64,FMULXv2i64_indexed)>;
|
||||
|
||||
def CyReadVMulAcc : SchedReadAdvance<1, [CyWriteVMul, CyWriteV64Mul]>;
|
||||
def : InstRW<[CyWriteVMul, CyReadVMulAcc],
|
||||
(instregex "MLA","MLS","SMLAL","SMLSL","UMLAL","UMLSL",
|
||||
"SQDMLAL","SQDMLSL")>;
|
||||
|
||||
def CyWriteSMul : SchedWriteRes<[CyUnitVM]> { let Latency = 8;}
|
||||
def CyWriteDMul : SchedWriteRes<[CyUnitVM]> { let Latency = 10;}
|
||||
def CyReadSMul : SchedReadAdvance<4, [CyWriteSMul]>;
|
||||
def CyReadDMul : SchedReadAdvance<5, [CyWriteDMul]>;
|
||||
|
||||
def : InstRW<[CyWriteSMul, CyReadSMul],
|
||||
(instrs FMADDSrrr,FMSUBSrrr,FNMADDSrrr,FNMSUBSrrr,
|
||||
FMLAv2f32,FMLAv4f32,
|
||||
FMLAv1i32_indexed,FMLAv1i64_indexed,FMLAv2i32_indexed)>;
|
||||
def : InstRW<[CyWriteDMul, CyReadDMul],
|
||||
(instrs FMADDDrrr,FMSUBDrrr,FNMADDDrrr,FNMSUBDrrr,
|
||||
FMLAv2f64,FMLAv2i64_indexed,
|
||||
FMLSv2f64,FMLSv2i64_indexed)>;
|
||||
|
||||
def CyWritePMUL : SchedWriteRes<[CyUnitVD]> { let Latency = 3; }
|
||||
def : InstRW<[CyWritePMUL], (instregex "PMULv", "PMULLv")>;
|
||||
|
||||
//---
|
||||
// 7.9.6 Divide and Square Root
|
||||
//---
|
||||
|
||||
// FDIV,FSQRT
|
||||
// TODO: Add 64-bit variant with 19 cycle latency.
|
||||
// TODO: Specialize FSQRT for longer latency.
|
||||
def : WriteRes<WriteFDiv, [CyUnitVD, CyUnitFloatDiv]> {
|
||||
let Latency = 17;
|
||||
let ResourceCycles = [2, 17];
|
||||
}
|
||||
|
||||
def : InstRW<[CyWriteV4], (instregex "FRECPEv","FRECPXv","URECPEv","URSQRTEv")>;
|
||||
|
||||
def WriteFRSQRTE : SchedWriteRes<[CyUnitVM]> { let Latency = 4; }
|
||||
def : InstRW<[WriteFRSQRTE], (instregex "FRSQRTEv")>;
|
||||
|
||||
def WriteFRECPS : SchedWriteRes<[CyUnitVM]> { let Latency = 8; }
|
||||
def WriteFRSQRTS : SchedWriteRes<[CyUnitVM]> { let Latency = 10; }
|
||||
def : InstRW<[WriteFRECPS], (instregex "FRECPSv")>;
|
||||
def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>;
|
||||
|
||||
//---
|
||||
// 7.9.7 Integer-FP Conversions
|
||||
//---
|
||||
|
||||
// FCVT lengthen f16/s32
|
||||
def : InstRW<[WriteV], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;
|
||||
|
||||
// FCVT,FCVTN,FCVTXN
|
||||
// SCVTF,UCVTF V,V
|
||||
// FRINT(AIMNPXZ) V,V
|
||||
def : WriteRes<WriteFCvt, [CyUnitV]> {let Latency = 4;}
|
||||
|
||||
// SCVT/UCVT S/D, Rd = VLD5+V4: 9 cycles.
|
||||
def CyWriteCvtToFPR : WriteSequence<[WriteVLD, CyWriteV4]>;
|
||||
def : InstRW<[CyWriteCopyToFPR], (instregex "FCVT[AMNPZ][SU][SU][WX][SD]r")>;
|
||||
|
||||
// FCVT Rd, S/D = V6+LD4: 10 cycles
|
||||
def CyWriteCvtToGPR : WriteSequence<[CyWriteV6, WriteLD]>;
|
||||
def : InstRW<[CyWriteCvtToGPR], (instregex "[SU]CVTF[SU][WX][SD]r")>;
|
||||
|
||||
// FCVTL is a WriteV
|
||||
|
||||
//---
|
||||
// 7.9.8-7.9.10 Cryptography, Data Transposition, Table Lookup
|
||||
//---
|
||||
|
||||
def CyWriteCrypto2 : SchedWriteRes<[CyUnitVD]> {let Latency = 2;}
|
||||
def : InstRW<[CyWriteCrypto2], (instrs AESIMCrr, AESMCrr, SHA1Hrr,
|
||||
AESDrr, AESErr, SHA1SU1rr, SHA256SU0rr,
|
||||
SHA1SU0rrr)>;
|
||||
|
||||
def CyWriteCrypto3 : SchedWriteRes<[CyUnitVD]> {let Latency = 3;}
|
||||
def : InstRW<[CyWriteCrypto3], (instrs SHA256SU1rrr)>;
|
||||
|
||||
def CyWriteCrypto6 : SchedWriteRes<[CyUnitVD]> {let Latency = 6;}
|
||||
def : InstRW<[CyWriteCrypto6], (instrs SHA1Crrr, SHA1Mrrr, SHA1Prrr,
|
||||
SHA256Hrrr,SHA256H2rrr)>;
|
||||
|
||||
// TRN,UZP,ZUP are WriteV.
|
||||
|
||||
// TBL,TBX are WriteV.
|
||||
|
||||
//---
|
||||
// 7.9.11-7.9.14 Load/Store, single element and paired
|
||||
//---
|
||||
|
||||
// Loading into the vector unit takes 5 cycles vs 4 for integer loads.
|
||||
def : WriteRes<WriteVLD, [CyUnitLS]> {
|
||||
let Latency = 5;
|
||||
}
|
||||
|
||||
// Store-load forwarding is 4 cycles.
|
||||
def : WriteRes<WriteVST, [CyUnitLS]> {
|
||||
let Latency = 4;
|
||||
}
|
||||
|
||||
// WriteVLDPair/VSTPair sequences are expanded by the target description.
|
||||
|
||||
//---
|
||||
// 7.9.15 Load, element operations
|
||||
//---
|
||||
|
||||
// Only the first WriteVLD and WriteAdr for writeback matches def operands.
|
||||
// Subsequent WriteVLDs consume resources. Since all loaded values have the
|
||||
// same latency, this is acceptable.
|
||||
|
||||
// Vd is read 5 cycles after issuing the vector load.
|
||||
def : ReadAdvance<ReadVLD, 5>;
|
||||
|
||||
def : InstRW<[WriteVLD],
|
||||
(instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteVLD, WriteAdr],
|
||||
(instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
|
||||
|
||||
// Register writes from the load's high half are fused micro-ops.
|
||||
def : InstRW<[WriteVLD],
|
||||
(instregex "LD1Twov(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[WriteVLD, WriteAdr],
|
||||
(instregex "LD1Twov(8b|4h|2s|1d)_POST")>;
|
||||
def : InstRW<[WriteVLD, WriteVLD],
|
||||
(instregex "LD1Twov(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteVLD, WriteAdr, WriteVLD],
|
||||
(instregex "LD1Twov(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLD, WriteVLD],
|
||||
(instregex "LD1Threev(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[WriteVLD, WriteAdr, WriteVLD],
|
||||
(instregex "LD1Threev(8b|4h|2s|1d)_POST")>;
|
||||
def : InstRW<[WriteVLD, WriteVLD, WriteVLD],
|
||||
(instregex "LD1Threev(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD],
|
||||
(instregex "LD1Threev(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLD, WriteVLD],
|
||||
(instregex "LD1Fourv(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[WriteVLD, WriteAdr, WriteVLD],
|
||||
(instregex "LD1Fourv(8b|4h|2s|1d)_POST")>;
|
||||
def : InstRW<[WriteVLD, WriteVLD, WriteVLD, WriteVLD],
|
||||
(instregex "LD1Fourv(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD, WriteVLD],
|
||||
(instregex "LD1Fourv(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD],
|
||||
(instregex "LD1i(8|16|32)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr],
|
||||
(instregex "LD1i(8|16|32)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD], (instrs LD1i64)>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr],(instrs LD1i64_POST)>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle],
|
||||
(instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr],
|
||||
(instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteV],
|
||||
(instregex "LD2Twov(8b|4h|2s)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
|
||||
(instregex "LD2Twov(8b|4h|2s)_POST$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle],
|
||||
(instregex "LD2Twov(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle],
|
||||
(instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
|
||||
(instregex "LD2i(8|16|32)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
|
||||
(instregex "LD2i(8|16|32)_POST")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
|
||||
(instregex "LD2i64$")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
|
||||
(instregex "LD2i64_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteV],
|
||||
(instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
|
||||
(instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
|
||||
(instregex "LD3Threev(8b|4h|2s)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
|
||||
(instregex "LD3Threev(8b|4h|2s)_POST")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle],
|
||||
(instregex "LD3Threev(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle],
|
||||
(instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV],
|
||||
(instregex "LD3i(8|16|32)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV],
|
||||
(instregex "LD3i(8|16|32)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV],
|
||||
(instregex "LD3i64$")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
|
||||
(instregex "LD3i64_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteV, WriteV],
|
||||
(instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV],
|
||||
(instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
|
||||
(instrs LD3Rv1d,LD3Rv2d)>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
|
||||
(instrs LD3Rv2d_POST,LD3Rv2d_POST)>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
|
||||
(instregex "LD4Fourv(8b|4h|2s)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
|
||||
(instregex "LD4Fourv(8b|4h|2s)_POST")>;
|
||||
def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle,
|
||||
WriteVLDPairShuffle, WriteVLDPairShuffle],
|
||||
(instregex "LD4Fourv(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle,
|
||||
WriteVLDPairShuffle, WriteVLDPairShuffle],
|
||||
(instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV, WriteV],
|
||||
(instregex "LD4i(8|16|32)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV, WriteV],
|
||||
(instregex "LD4i(8|16|32)_POST")>;
|
||||
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV, WriteV],
|
||||
(instrs LD4i64)>;
|
||||
def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
|
||||
(instrs LD4i64_POST)>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteV, WriteV, WriteV],
|
||||
(instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV, WriteV],
|
||||
(instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
|
||||
(instrs LD4Rv1d,LD4Rv2d)>;
|
||||
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
|
||||
(instrs LD4Rv1d_POST,LD4Rv2d_POST)>;
|
||||
|
||||
//---
|
||||
// 7.9.16 Store, element operations
|
||||
//---
|
||||
|
||||
// Only the WriteAdr for writeback matches a def operands.
|
||||
// Subsequent WriteVLDs only consume resources.
|
||||
|
||||
def : InstRW<[WriteVST],
|
||||
(instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteAdr, WriteVST],
|
||||
(instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVSTShuffle],
|
||||
(instregex "ST1Twov(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[WriteAdr, WriteVSTShuffle],
|
||||
(instregex "ST1Twov(8b|4h|2s|1d)_POST")>;
|
||||
def : InstRW<[WriteVST, WriteVST],
|
||||
(instregex "ST1Twov(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteAdr, WriteVST, WriteVST],
|
||||
(instregex "ST1Twov(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVSTShuffle, WriteVST],
|
||||
(instregex "ST1Threev(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVST],
|
||||
(instregex "ST1Threev(8b|4h|2s|1d)_POST")>;
|
||||
def : InstRW<[WriteVST, WriteVST, WriteVST],
|
||||
(instregex "ST1Threev(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST],
|
||||
(instregex "ST1Threev(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVSTShuffle, WriteVSTShuffle],
|
||||
(instregex "ST1Fourv(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],
|
||||
(instregex "ST1Fourv(8b|4h|2s|1d)_POST")>;
|
||||
def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST],
|
||||
(instregex "ST1Fourv(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST, WriteVST],
|
||||
(instregex "ST1Fourv(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVSTShuffle], (instregex "ST1i(8|16|32)$")>;
|
||||
def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST1i(8|16|32)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVSTShuffle], (instrs ST1i64)>;
|
||||
def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST1i64_POST)>;
|
||||
|
||||
def : InstRW<[WriteVSTShuffle],
|
||||
(instregex "ST2Twov(8b|4h|2s)$")>;
|
||||
def : InstRW<[WriteAdr, WriteVSTShuffle],
|
||||
(instregex "ST2Twov(8b|4h|2s)_POST")>;
|
||||
def : InstRW<[WriteVSTShuffle, WriteVSTShuffle],
|
||||
(instregex "ST2Twov(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],
|
||||
(instregex "ST2Twov(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVSTShuffle], (instregex "ST2i(8|16|32)$")>;
|
||||
def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST2i(8|16|32)_POST")>;
|
||||
def : InstRW<[WriteVSTShuffle], (instrs ST2i64)>;
|
||||
def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST2i64_POST)>;
|
||||
|
||||
def : InstRW<[WriteVSTShuffle, WriteVSTShuffle],
|
||||
(instregex "ST3Threev(8b|4h|2s)$")>;
|
||||
def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],
|
||||
(instregex "ST3Threev(8b|4h|2s)_POST")>;
|
||||
def : InstRW<[WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle],
|
||||
(instregex "ST3Threev(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle],
|
||||
(instregex "ST3Threev(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVSTShuffle], (instregex "ST3i(8|16|32)$")>;
|
||||
def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST3i(8|16|32)_POST")>;
|
||||
|
||||
def :InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64)>;
|
||||
def :InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64_POST)>;
|
||||
|
||||
def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle],
|
||||
(instregex "ST4Fourv(8b|4h|2s|1d)$")>;
|
||||
def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle],
|
||||
(instregex "ST4Fourv(8b|4h|2s|1d)_POST")>;
|
||||
def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle,
|
||||
WriteVSTPairShuffle, WriteVSTPairShuffle],
|
||||
(instregex "ST4Fourv(16b|8h|4s|2d)$")>;
|
||||
def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle,
|
||||
WriteVSTPairShuffle, WriteVSTPairShuffle],
|
||||
(instregex "ST4Fourv(16b|8h|4s|2d)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVSTPairShuffle], (instregex "ST4i(8|16|32)$")>;
|
||||
def : InstRW<[WriteAdr, WriteVSTPairShuffle], (instregex "ST4i(8|16|32)_POST")>;
|
||||
|
||||
def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST4i64)>;
|
||||
def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],(instrs ST4i64_POST)>;
|
||||
|
||||
} // SchedModel = CycloneModel
|
92
lib/Target/ARM64/ARM64Schedule.td
Normal file
92
lib/Target/ARM64/ARM64Schedule.td
Normal file
@ -0,0 +1,92 @@
|
||||
//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Define TII for use in SchedVariant Predicates.
|
||||
// const MachineInstr *MI and const TargetSchedModel *SchedModel
|
||||
// are defined by default.
|
||||
def : PredicateProlog<[{
|
||||
const ARM64InstrInfo *TII =
|
||||
static_cast<const ARM64InstrInfo*>(SchedModel->getInstrInfo());
|
||||
(void)TII;
|
||||
}]>;
|
||||
|
||||
// ARM64 Scheduler Definitions
|
||||
|
||||
def WriteImm : SchedWrite; // MOVN, MOVZ
|
||||
// TODO: Provide variants for MOV32/64imm Pseudos that dynamically
|
||||
// select the correct sequence of WriteImms.
|
||||
|
||||
def WriteI : SchedWrite; // ALU
|
||||
def WriteISReg : SchedWrite; // ALU of Shifted-Reg
|
||||
def WriteIEReg : SchedWrite; // ALU of Extended-Reg
|
||||
def WriteExtr : SchedWrite; // EXTR shifts a reg pair
|
||||
def ReadExtrHi : SchedRead; // Read the high reg of the EXTR pair
|
||||
def WriteIS : SchedWrite; // Shift/Scale
|
||||
def WriteID32 : SchedWrite; // 32-bit Divide
|
||||
def WriteID64 : SchedWrite; // 64-bit Divide
|
||||
def WriteIM32 : SchedWrite; // 32-bit Multiply
|
||||
def WriteIM64 : SchedWrite; // 64-bit Multiply
|
||||
def WriteBr : SchedWrite; // Branch
|
||||
def WriteBrReg : SchedWrite; // Indirect Branch
|
||||
|
||||
def WriteLD : SchedWrite; // Load from base addr plus immediate offset
|
||||
def WriteST : SchedWrite; // Store to base addr plus immediate offset
|
||||
def WriteSTP : SchedWrite; // Store a register pair.
|
||||
def WriteAdr : SchedWrite; // Address pre/post increment.
|
||||
|
||||
def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled).
|
||||
def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled).
|
||||
def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST.
|
||||
|
||||
// ScaledIdxPred is true if a WriteLDIdx operand will be
|
||||
// scaled. Subtargets can use this to dynamically select resources and
|
||||
// latency for WriteLDIdx and ReadAdrBase.
|
||||
def ScaledIdxPred : SchedPredicate<[{TII->isScaledAddr(MI)}]>;
|
||||
|
||||
// Serialized two-level address load.
|
||||
// EXAMPLE: LOADGot
|
||||
def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>;
|
||||
|
||||
// Serialized two-level address lookup.
|
||||
// EXAMPLE: MOVaddr...
|
||||
def WriteAdrAdr : WriteSequence<[WriteAdr, WriteAdr]>;
|
||||
|
||||
// The second register of a load-pair.
|
||||
// LDP,LDPSW,LDNP,LDXP,LDAXP
|
||||
def WriteLDHi : SchedWrite;
|
||||
|
||||
// Store-exclusive is a store followed by a dependent load.
|
||||
def WriteSTX : WriteSequence<[WriteST, WriteLD]>;
|
||||
|
||||
def WriteSys : SchedWrite; // Long, variable latency system ops.
|
||||
def WriteBarrier : SchedWrite; // Memory barrier.
|
||||
def WriteHint : SchedWrite; // Hint instruction.
|
||||
|
||||
def WriteF : SchedWrite; // General floating-point ops.
|
||||
def WriteFCmp : SchedWrite; // Floating-point compare.
|
||||
def WriteFCvt : SchedWrite; // Float conversion.
|
||||
def WriteFCopy : SchedWrite; // Float-int register copy.
|
||||
def WriteFImm : SchedWrite; // Floating-point immediate.
|
||||
def WriteFMul : SchedWrite; // Floating-point multiply.
|
||||
def WriteFDiv : SchedWrite; // Floating-point division.
|
||||
|
||||
def WriteV : SchedWrite; // Vector ops.
|
||||
def WriteVLD : SchedWrite; // Vector loads.
|
||||
def WriteVST : SchedWrite; // Vector stores.
|
||||
|
||||
// Read the unwritten lanes of the VLD's destination registers.
|
||||
def ReadVLD : SchedRead;
|
||||
|
||||
// Sequential vector load and shuffle.
|
||||
def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteV]>;
|
||||
def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>;
|
||||
|
||||
// Store a shuffled vector.
|
||||
def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>;
|
||||
def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>;
|
57
lib/Target/ARM64/ARM64SelectionDAGInfo.cpp
Normal file
57
lib/Target/ARM64/ARM64SelectionDAGInfo.cpp
Normal file
@ -0,0 +1,57 @@
|
||||
//===-- ARM64SelectionDAGInfo.cpp - ARM64 SelectionDAG Info ---------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the ARM64SelectionDAGInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "arm64-selectiondag-info"
|
||||
#include "ARM64TargetMachine.h"
|
||||
using namespace llvm;
|
||||
|
||||
ARM64SelectionDAGInfo::ARM64SelectionDAGInfo(const TargetMachine &TM)
|
||||
: TargetSelectionDAGInfo(TM),
|
||||
Subtarget(&TM.getSubtarget<ARM64Subtarget>()) {}
|
||||
|
||||
ARM64SelectionDAGInfo::~ARM64SelectionDAGInfo() {}
|
||||
|
||||
SDValue ARM64SelectionDAGInfo::EmitTargetCodeForMemset(
|
||||
SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src,
|
||||
SDValue Size, unsigned Align, bool isVolatile,
|
||||
MachinePointerInfo DstPtrInfo) const {
|
||||
// Check to see if there is a specialized entry-point for memory zeroing.
|
||||
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
|
||||
ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
|
||||
const char *bzeroEntry =
|
||||
(V && V->isNullValue()) ? Subtarget->getBZeroEntry() : 0;
|
||||
// For small size (< 256), it is not beneficial to use bzero
|
||||
// instead of memset.
|
||||
if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
|
||||
const ARM64TargetLowering &TLI = *static_cast<const ARM64TargetLowering *>(
|
||||
DAG.getTarget().getTargetLowering());
|
||||
|
||||
EVT IntPtr = TLI.getPointerTy();
|
||||
Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
|
||||
TargetLowering::ArgListTy Args;
|
||||
TargetLowering::ArgListEntry Entry;
|
||||
Entry.Node = Dst;
|
||||
Entry.Ty = IntPtrTy;
|
||||
Args.push_back(Entry);
|
||||
Entry.Node = Size;
|
||||
Args.push_back(Entry);
|
||||
TargetLowering::CallLoweringInfo CLI(
|
||||
Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false,
|
||||
0, CallingConv::C, /*isTailCall=*/false,
|
||||
/*doesNotRet=*/false, /*isReturnValueUsed=*/false,
|
||||
DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
|
||||
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
|
||||
return CallResult.second;
|
||||
}
|
||||
return SDValue();
|
||||
}
|
38
lib/Target/ARM64/ARM64SelectionDAGInfo.h
Normal file
38
lib/Target/ARM64/ARM64SelectionDAGInfo.h
Normal file
@ -0,0 +1,38 @@
|
||||
//===-- ARM64SelectionDAGInfo.h - ARM64 SelectionDAG Info -------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the ARM64 subclass for TargetSelectionDAGInfo.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef ARM64SELECTIONDAGINFO_H
|
||||
#define ARM64SELECTIONDAGINFO_H
|
||||
|
||||
#include "llvm/Target/TargetSelectionDAGInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class ARM64SelectionDAGInfo : public TargetSelectionDAGInfo {
|
||||
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
|
||||
/// make the right decision when generating code for different targets.
|
||||
const ARM64Subtarget *Subtarget;
|
||||
|
||||
public:
|
||||
explicit ARM64SelectionDAGInfo(const TargetMachine &TM);
|
||||
~ARM64SelectionDAGInfo();
|
||||
|
||||
virtual SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
|
||||
SDValue Chain, SDValue Dst,
|
||||
SDValue Src, SDValue Size,
|
||||
unsigned Align, bool isVolatile,
|
||||
MachinePointerInfo DstPtrInfo) const;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
169
lib/Target/ARM64/ARM64StorePairSuppress.cpp
Normal file
169
lib/Target/ARM64/ARM64StorePairSuppress.cpp
Normal file
@ -0,0 +1,169 @@
|
||||
//===---- ARM64StorePairSuppress.cpp --- Suppress store pair formation ----===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass identifies floating point stores that should not be combined into
|
||||
// store pairs. Later we may do the same for floating point loads.
|
||||
// ===---------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "arm64-stp-suppress"
|
||||
#include "ARM64InstrInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineTraceMetrics.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/CodeGen/TargetSchedule.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
class ARM64StorePairSuppress : public MachineFunctionPass {
|
||||
const ARM64InstrInfo *TII;
|
||||
const TargetRegisterInfo *TRI;
|
||||
const MachineRegisterInfo *MRI;
|
||||
MachineFunction *MF;
|
||||
TargetSchedModel SchedModel;
|
||||
MachineTraceMetrics *Traces;
|
||||
MachineTraceMetrics::Ensemble *MinInstr;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
ARM64StorePairSuppress() : MachineFunctionPass(ID) {}
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "ARM64 Store Pair Suppression";
|
||||
}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &F);
|
||||
|
||||
private:
|
||||
bool shouldAddSTPToBlock(const MachineBasicBlock *BB);
|
||||
|
||||
bool isNarrowFPStore(const MachineInstr *MI);
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesCFG();
|
||||
AU.addRequired<MachineTraceMetrics>();
|
||||
AU.addPreserved<MachineTraceMetrics>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
char ARM64StorePairSuppress::ID = 0;
|
||||
} // anonymous
|
||||
|
||||
FunctionPass *llvm::createARM64StorePairSuppressPass() {
|
||||
return new ARM64StorePairSuppress();
|
||||
}
|
||||
|
||||
/// Return true if an STP can be added to this block without increasing the
|
||||
/// critical resource height. STP is good to form in Ld/St limited blocks and
|
||||
/// bad to form in float-point limited blocks. This is true independent of the
|
||||
/// critical path. If the critical path is longer than the resource height, the
|
||||
/// extra vector ops can limit physreg renaming. Otherwise, it could simply
|
||||
/// oversaturate the vector units.
|
||||
bool ARM64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
|
||||
if (!MinInstr)
|
||||
MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
|
||||
|
||||
MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB);
|
||||
unsigned ResLength = BBTrace.getResourceLength();
|
||||
|
||||
// Get the machine model's scheduling class for STPQi.
|
||||
// Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
|
||||
unsigned SCIdx = TII->get(ARM64::STPDi).getSchedClass();
|
||||
const MCSchedClassDesc *SCDesc =
|
||||
SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);
|
||||
|
||||
// If a subtarget does not define resources for STPQi, bail here.
|
||||
if (SCDesc->isValid() && !SCDesc->isVariant()) {
|
||||
unsigned ResLenWithSTP = BBTrace.getResourceLength(
|
||||
ArrayRef<const MachineBasicBlock *>(), SCDesc);
|
||||
if (ResLenWithSTP > ResLength) {
|
||||
DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber()
|
||||
<< " resources " << ResLength << " -> " << ResLenWithSTP
|
||||
<< "\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Return true if this is a floating-point store smaller than the V reg. On
|
||||
/// cyclone, these require a vector shuffle before storing a pair.
|
||||
/// Ideally we would call getMatchingPairOpcode() and have the machine model
|
||||
/// tell us if it's profitable with no cpu knowledge here.
|
||||
///
|
||||
/// FIXME: We plan to develop a decent Target abstraction for simple loads and
|
||||
/// stores. Until then use a nasty switch similar to ARM64LoadStoreOptimizer.
|
||||
bool ARM64StorePairSuppress::isNarrowFPStore(const MachineInstr *MI) {
|
||||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
case ARM64::STRSui:
|
||||
case ARM64::STRDui:
|
||||
case ARM64::STURSi:
|
||||
case ARM64::STURDi:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool ARM64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
|
||||
MF = &mf;
|
||||
TII = static_cast<const ARM64InstrInfo *>(MF->getTarget().getInstrInfo());
|
||||
TRI = MF->getTarget().getRegisterInfo();
|
||||
MRI = &MF->getRegInfo();
|
||||
const TargetSubtargetInfo &ST =
|
||||
MF->getTarget().getSubtarget<TargetSubtargetInfo>();
|
||||
SchedModel.init(*ST.getSchedModel(), &ST, TII);
|
||||
|
||||
Traces = &getAnalysis<MachineTraceMetrics>();
|
||||
MinInstr = 0;
|
||||
|
||||
DEBUG(dbgs() << "*** " << getPassName() << ": " << MF->getName() << '\n');
|
||||
|
||||
if (!SchedModel.hasInstrSchedModel()) {
|
||||
DEBUG(dbgs() << " Skipping pass: no machine model present.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check for a sequence of stores to the same base address. We don't need to
|
||||
// precisely determine whether a store pair can be formed. But we do want to
|
||||
// filter out most situations where we can't form store pairs to avoid
|
||||
// computing trace metrics in those cases.
|
||||
for (MachineFunction::iterator BI = MF->begin(), BE = MF->end(); BI != BE;
|
||||
++BI) {
|
||||
bool SuppressSTP = false;
|
||||
unsigned PrevBaseReg = 0;
|
||||
for (MachineBasicBlock::iterator I = BI->begin(), E = BI->end(); I != E;
|
||||
++I) {
|
||||
if (!isNarrowFPStore(I))
|
||||
continue;
|
||||
unsigned BaseReg;
|
||||
unsigned Offset;
|
||||
if (TII->getLdStBaseRegImmOfs(I, BaseReg, Offset, TRI)) {
|
||||
if (PrevBaseReg == BaseReg) {
|
||||
// If this block can take STPs, skip ahead to the next block.
|
||||
if (!SuppressSTP && shouldAddSTPToBlock(I->getParent()))
|
||||
break;
|
||||
// Otherwise, continue unpairing the stores in this block.
|
||||
DEBUG(dbgs() << "Unpairing store " << *I << "\n");
|
||||
SuppressSTP = true;
|
||||
TII->suppressLdStPair(I);
|
||||
}
|
||||
PrevBaseReg = BaseReg;
|
||||
} else
|
||||
PrevBaseReg = 0;
|
||||
}
|
||||
}
|
||||
// This pass just sets some internal MachineMemOperand flags. It can't really
|
||||
// invalidate anything.
|
||||
return false;
|
||||
}
|
83
lib/Target/ARM64/ARM64Subtarget.cpp
Normal file
83
lib/Target/ARM64/ARM64Subtarget.cpp
Normal file
@ -0,0 +1,83 @@
|
||||
//===-- ARM64Subtarget.cpp - ARM64 Subtarget Information --------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the ARM64 specific subclass of TargetSubtarget.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ARM64InstrInfo.h"
|
||||
#include "ARM64Subtarget.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/CodeGen/MachineScheduler.h"
|
||||
#include "llvm/IR/GlobalValue.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
|
||||
#define GET_SUBTARGETINFO_CTOR
|
||||
#define GET_SUBTARGETINFO_TARGET_DESC
|
||||
#include "ARM64GenSubtargetInfo.inc"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
ARM64Subtarget::ARM64Subtarget(const std::string &TT, const std::string &CPU,
|
||||
const std::string &FS)
|
||||
: ARM64GenSubtargetInfo(TT, CPU, FS), HasZeroCycleRegMove(false),
|
||||
HasZeroCycleZeroing(false), CPUString(CPU), TargetTriple(TT) {
|
||||
// Determine default and user-specified characteristics
|
||||
|
||||
if (CPUString.empty())
|
||||
// We default to Cyclone for now.
|
||||
CPUString = "cyclone";
|
||||
|
||||
ParseSubtargetFeatures(CPUString, FS);
|
||||
}
|
||||
|
||||
/// ClassifyGlobalReference - Find the target operand flags that describe
|
||||
/// how a global value should be referenced for the current subtarget.
|
||||
unsigned char
|
||||
ARM64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
|
||||
const TargetMachine &TM) const {
|
||||
|
||||
// Determine whether this is a reference to a definition or a declaration.
|
||||
// Materializable GVs (in JIT lazy compilation mode) do not require an extra
|
||||
// load from stub.
|
||||
bool isDecl = GV->hasAvailableExternallyLinkage();
|
||||
if (GV->isDeclaration() && !GV->isMaterializable())
|
||||
isDecl = true;
|
||||
|
||||
// If symbol visibility is hidden, the extra load is not needed if
|
||||
// the symbol is definitely defined in the current translation unit.
|
||||
if (TM.getRelocationModel() != Reloc::Static && GV->hasDefaultVisibility() &&
|
||||
(isDecl || GV->isWeakForLinker()))
|
||||
return ARM64II::MO_GOT;
|
||||
|
||||
if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
|
||||
return ARM64II::MO_GOT;
|
||||
|
||||
// FIXME: this will fail on static ELF for weak symbols.
|
||||
return ARM64II::MO_NO_FLAG;
|
||||
}
|
||||
|
||||
/// This function returns the name of a function which has an interface
|
||||
/// like the non-standard bzero function, if such a function exists on
|
||||
/// the current subtarget and it is considered prefereable over
|
||||
/// memset with zero passed as the second argument. Otherwise it
|
||||
/// returns null.
|
||||
const char *ARM64Subtarget::getBZeroEntry() const {
|
||||
// At the moment, always prefer bzero.
|
||||
return "bzero";
|
||||
}
|
||||
|
||||
void ARM64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
|
||||
MachineInstr *begin, MachineInstr *end,
|
||||
unsigned NumRegionInstrs) const {
|
||||
// LNT run (at least on Cyclone) showed reasonably significant gains for
|
||||
// bi-directional scheduling. 253.perlbmk.
|
||||
Policy.OnlyTopDown = false;
|
||||
Policy.OnlyBottomUp = false;
|
||||
}
|
87
lib/Target/ARM64/ARM64Subtarget.h
Normal file
87
lib/Target/ARM64/ARM64Subtarget.h
Normal file
@ -0,0 +1,87 @@
|
||||
//=====---- ARM64Subtarget.h - Define Subtarget for the ARM64 -*- C++ -*--====//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file declares the ARM64 specific subclass of TargetSubtarget.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef ARM64SUBTARGET_H
|
||||
#define ARM64SUBTARGET_H
|
||||
|
||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||
#include "ARM64RegisterInfo.h"
|
||||
#include <string>
|
||||
|
||||
#define GET_SUBTARGETINFO_HEADER
|
||||
#include "ARM64GenSubtargetInfo.inc"
|
||||
|
||||
namespace llvm {
|
||||
class GlobalValue;
|
||||
class StringRef;
|
||||
|
||||
class ARM64Subtarget : public ARM64GenSubtargetInfo {
|
||||
protected:
|
||||
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
|
||||
bool HasZeroCycleRegMove;
|
||||
|
||||
// HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
|
||||
bool HasZeroCycleZeroing;
|
||||
|
||||
/// CPUString - String name of used CPU.
|
||||
std::string CPUString;
|
||||
|
||||
/// TargetTriple - What processor and OS we're targeting.
|
||||
Triple TargetTriple;
|
||||
|
||||
public:
|
||||
/// This constructor initializes the data members to match that
|
||||
/// of the specified triple.
|
||||
ARM64Subtarget(const std::string &TT, const std::string &CPU,
|
||||
const std::string &FS);
|
||||
|
||||
virtual bool enableMachineScheduler() const { return true; }
|
||||
|
||||
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
|
||||
|
||||
bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
|
||||
|
||||
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
|
||||
|
||||
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
|
||||
|
||||
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
|
||||
|
||||
bool isCyclone() const { return CPUString == "cyclone"; }
|
||||
|
||||
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
|
||||
/// that still makes it profitable to inline the call.
|
||||
unsigned getMaxInlineSizeThreshold() const { return 64; }
|
||||
|
||||
/// ParseSubtargetFeatures - Parses features string setting specified
|
||||
/// subtarget options. Definition of function is auto generated by tblgen.
|
||||
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
|
||||
|
||||
/// ClassifyGlobalReference - Find the target operand flags that describe
|
||||
/// how a global value should be referenced for the current subtarget.
|
||||
unsigned char ClassifyGlobalReference(const GlobalValue *GV,
|
||||
const TargetMachine &TM) const;
|
||||
|
||||
/// This function returns the name of a function which has an interface
|
||||
/// like the non-standard bzero function, if such a function exists on
|
||||
/// the current subtarget and it is considered prefereable over
|
||||
/// memset with zero passed as the second argument. Otherwise it
|
||||
/// returns null.
|
||||
const char *getBZeroEntry() const;
|
||||
|
||||
void overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin,
|
||||
MachineInstr *end, unsigned NumRegionInstrs) const;
|
||||
};
|
||||
} // End llvm namespace
|
||||
|
||||
#endif // ARM64SUBTARGET_H
|
157
lib/Target/ARM64/ARM64TargetMachine.cpp
Normal file
157
lib/Target/ARM64/ARM64TargetMachine.cpp
Normal file
@ -0,0 +1,157 @@
|
||||
//===-- ARM64TargetMachine.cpp - Define TargetMachine for ARM64 -----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ARM64.h"
|
||||
#include "ARM64TargetMachine.h"
|
||||
#include "llvm/PassManager.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool> EnableCCMP("arm64-ccmp",
|
||||
cl::desc("Enable the CCMP formation pass"),
|
||||
cl::init(true));
|
||||
|
||||
static cl::opt<bool> EnableStPairSuppress("arm64-stp-suppress", cl::Hidden,
|
||||
cl::desc("Suppress STP for ARM64"),
|
||||
cl::init(true));
|
||||
|
||||
static cl::opt<bool>
|
||||
EnablePromoteConstant("arm64-promote-const", cl::Hidden,
|
||||
cl::desc("Enable the promote constant pass"),
|
||||
cl::init(true));
|
||||
|
||||
static cl::opt<bool>
|
||||
EnableCollectLOH("arm64-collect-loh", cl::Hidden,
|
||||
cl::desc("Enable the pass that emits the linker"
|
||||
" optimization hints (LOH)"),
|
||||
cl::init(true));
|
||||
|
||||
extern "C" void LLVMInitializeARM64Target() {
|
||||
// Register the target.
|
||||
RegisterTargetMachine<ARM64TargetMachine> X(TheARM64Target);
|
||||
}
|
||||
|
||||
/// TargetMachine ctor - Create an ARM64 architecture model.
|
||||
///
|
||||
ARM64TargetMachine::ARM64TargetMachine(const Target &T, StringRef TT,
|
||||
StringRef CPU, StringRef FS,
|
||||
const TargetOptions &Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL)
|
||||
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
|
||||
Subtarget(TT, CPU, FS),
|
||||
DL(Subtarget.isTargetMachO() ? "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
: "e-m:e-i64:64-i128:128-n32:64-S128"),
|
||||
InstrInfo(Subtarget), TLInfo(*this), FrameLowering(*this, Subtarget),
|
||||
TSInfo(*this) {
|
||||
initAsmInfo();
|
||||
}
|
||||
|
||||
namespace {
|
||||
/// ARM64 Code Generator Pass Configuration Options.
|
||||
class ARM64PassConfig : public TargetPassConfig {
|
||||
public:
|
||||
ARM64PassConfig(ARM64TargetMachine *TM, PassManagerBase &PM)
|
||||
: TargetPassConfig(TM, PM) {}
|
||||
|
||||
ARM64TargetMachine &getARM64TargetMachine() const {
|
||||
return getTM<ARM64TargetMachine>();
|
||||
}
|
||||
|
||||
virtual bool addPreISel();
|
||||
virtual bool addInstSelector();
|
||||
virtual bool addILPOpts();
|
||||
virtual bool addPreRegAlloc();
|
||||
virtual bool addPostRegAlloc();
|
||||
virtual bool addPreSched2();
|
||||
virtual bool addPreEmitPass();
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void ARM64TargetMachine::addAnalysisPasses(PassManagerBase &PM) {
|
||||
// Add first the target-independent BasicTTI pass, then our ARM64 pass. This
|
||||
// allows the ARM64 pass to delegate to the target independent layer when
|
||||
// appropriate.
|
||||
PM.add(createBasicTargetTransformInfoPass(this));
|
||||
PM.add(createARM64TargetTransformInfoPass(this));
|
||||
}
|
||||
|
||||
TargetPassConfig *ARM64TargetMachine::createPassConfig(PassManagerBase &PM) {
|
||||
return new ARM64PassConfig(this, PM);
|
||||
}
|
||||
|
||||
// Pass Pipeline Configuration
|
||||
bool ARM64PassConfig::addPreISel() {
|
||||
// Run promote constant before global merge, so that the promoted constants
|
||||
// get a chance to be merged
|
||||
if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
|
||||
addPass(createARM64PromoteConstantPass());
|
||||
if (TM->getOptLevel() != CodeGenOpt::None)
|
||||
addPass(createGlobalMergePass(TM));
|
||||
if (TM->getOptLevel() != CodeGenOpt::None)
|
||||
addPass(createARM64AddressTypePromotionPass());
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ARM64PassConfig::addInstSelector() {
|
||||
addPass(createARM64ISelDag(getARM64TargetMachine(), getOptLevel()));
|
||||
|
||||
// For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
|
||||
// references to _TLS_MODULE_BASE_ as possible.
|
||||
if (TM->getSubtarget<ARM64Subtarget>().isTargetELF() &&
|
||||
getOptLevel() != CodeGenOpt::None)
|
||||
addPass(createARM64CleanupLocalDynamicTLSPass());
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ARM64PassConfig::addILPOpts() {
|
||||
if (EnableCCMP)
|
||||
addPass(createARM64ConditionalCompares());
|
||||
addPass(&EarlyIfConverterID);
|
||||
if (EnableStPairSuppress)
|
||||
addPass(createARM64StorePairSuppressPass());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARM64PassConfig::addPreRegAlloc() {
|
||||
// Use AdvSIMD scalar instructions whenever profitable.
|
||||
addPass(createARM64AdvSIMDScalar());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARM64PassConfig::addPostRegAlloc() {
|
||||
// Change dead register definitions to refer to the zero register.
|
||||
addPass(createARM64DeadRegisterDefinitions());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARM64PassConfig::addPreSched2() {
|
||||
// Expand some pseudo instructions to allow proper scheduling.
|
||||
addPass(createARM64ExpandPseudoPass());
|
||||
// Use load/store pair instructions when possible.
|
||||
addPass(createARM64LoadStoreOptimizationPass());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARM64PassConfig::addPreEmitPass() {
|
||||
// Relax conditional branch instructions if they're otherwise out of
|
||||
// range of their destination.
|
||||
addPass(createARM64BranchRelaxation());
|
||||
if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH)
|
||||
addPass(createARM64CollectLOHPass());
|
||||
return true;
|
||||
}
|
69
lib/Target/ARM64/ARM64TargetMachine.h
Normal file
69
lib/Target/ARM64/ARM64TargetMachine.h
Normal file
@ -0,0 +1,69 @@
|
||||
//===-- ARM64TargetMachine.h - Define TargetMachine for ARM64 ---*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file declares the ARM64 specific subclass of TargetMachine.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef ARM64TARGETMACHINE_H
|
||||
#define ARM64TARGETMACHINE_H
|
||||
|
||||
#include "ARM64InstrInfo.h"
|
||||
#include "ARM64ISelLowering.h"
|
||||
#include "ARM64Subtarget.h"
|
||||
#include "ARM64FrameLowering.h"
|
||||
#include "ARM64SelectionDAGInfo.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class ARM64TargetMachine : public LLVMTargetMachine {
|
||||
protected:
|
||||
ARM64Subtarget Subtarget;
|
||||
|
||||
private:
|
||||
const DataLayout DL;
|
||||
ARM64InstrInfo InstrInfo;
|
||||
ARM64TargetLowering TLInfo;
|
||||
ARM64FrameLowering FrameLowering;
|
||||
ARM64SelectionDAGInfo TSInfo;
|
||||
|
||||
public:
|
||||
ARM64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
|
||||
const TargetOptions &Options, Reloc::Model RM,
|
||||
CodeModel::Model CM, CodeGenOpt::Level OL);
|
||||
|
||||
virtual const ARM64Subtarget *getSubtargetImpl() const { return &Subtarget; }
|
||||
virtual const ARM64TargetLowering *getTargetLowering() const {
|
||||
return &TLInfo;
|
||||
}
|
||||
virtual const DataLayout *getDataLayout() const { return &DL; }
|
||||
virtual const ARM64FrameLowering *getFrameLowering() const {
|
||||
return &FrameLowering;
|
||||
}
|
||||
virtual const ARM64InstrInfo *getInstrInfo() const { return &InstrInfo; }
|
||||
virtual const ARM64RegisterInfo *getRegisterInfo() const {
|
||||
return &InstrInfo.getRegisterInfo();
|
||||
}
|
||||
virtual const ARM64SelectionDAGInfo *getSelectionDAGInfo() const {
|
||||
return &TSInfo;
|
||||
}
|
||||
|
||||
// Pass Pipeline Configuration
|
||||
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
|
||||
|
||||
/// \brief Register ARM64 analysis passes with a pass manager.
|
||||
virtual void addAnalysisPasses(PassManagerBase &PM);
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
52
lib/Target/ARM64/ARM64TargetObjectFile.cpp
Normal file
52
lib/Target/ARM64/ARM64TargetObjectFile.cpp
Normal file
@ -0,0 +1,52 @@
|
||||
//===-- ARM64TargetObjectFile.cpp - ARM64 Object Info ---------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ARM64TargetObjectFile.h"
|
||||
#include "ARM64TargetMachine.h"
|
||||
#include "llvm/IR/Mangler.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCExpr.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/Support/Dwarf.h"
|
||||
using namespace llvm;
|
||||
using namespace dwarf;
|
||||
|
||||
void ARM64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
|
||||
const TargetMachine &TM) {
|
||||
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
|
||||
InitializeELF(TM.Options.UseInitArray);
|
||||
}
|
||||
|
||||
const MCExpr *ARM64_MachoTargetObjectFile::getTTypeGlobalReference(
|
||||
const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
|
||||
const TargetMachine &TM, MachineModuleInfo *MMI,
|
||||
MCStreamer &Streamer) const {
|
||||
// On Darwin, we can reference dwarf symbols with foo@GOT-., which
|
||||
// is an indirect pc-relative reference. The default implementation
|
||||
// won't reference using the GOT, so we need this target-specific
|
||||
// version.
|
||||
if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
|
||||
const MCSymbol *Sym = TM.getSymbol(GV, Mang);
|
||||
const MCExpr *Res =
|
||||
MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
|
||||
MCSymbol *PCSym = getContext().CreateTempSymbol();
|
||||
Streamer.EmitLabel(PCSym);
|
||||
const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
|
||||
return MCBinaryExpr::CreateSub(Res, PC, getContext());
|
||||
}
|
||||
|
||||
return TargetLoweringObjectFileMachO::getTTypeGlobalReference(
|
||||
GV, Encoding, Mang, TM, MMI, Streamer);
|
||||
}
|
||||
|
||||
MCSymbol *ARM64_MachoTargetObjectFile::getCFIPersonalitySymbol(
|
||||
const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
|
||||
MachineModuleInfo *MMI) const {
|
||||
return TM.getSymbol(GV, Mang);
|
||||
}
|
40
lib/Target/ARM64/ARM64TargetObjectFile.h
Normal file
40
lib/Target/ARM64/ARM64TargetObjectFile.h
Normal file
@ -0,0 +1,40 @@
|
||||
//===-- ARM64TargetObjectFile.h - ARM64 Object Info -*- C++ -------------*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TARGET_ARM64_TARGETOBJECTFILE_H
|
||||
#define LLVM_TARGET_ARM64_TARGETOBJECTFILE_H
|
||||
|
||||
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
|
||||
#include "llvm/Target/TargetLoweringObjectFile.h"
|
||||
|
||||
namespace llvm {
|
||||
class ARM64TargetMachine;
|
||||
|
||||
/// This implementation is used for AArch64 ELF targets (Linux in particular).
|
||||
class ARM64_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
|
||||
virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
|
||||
};
|
||||
|
||||
/// ARM64_MachoTargetObjectFile - This TLOF implementation is used for Darwin.
|
||||
class ARM64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
|
||||
public:
|
||||
const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
|
||||
unsigned Encoding, Mangler &Mang,
|
||||
const TargetMachine &TM,
|
||||
MachineModuleInfo *MMI,
|
||||
MCStreamer &Streamer) const override;
|
||||
|
||||
MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang,
|
||||
const TargetMachine &TM,
|
||||
MachineModuleInfo *MMI) const override;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
326
lib/Target/ARM64/ARM64TargetTransformInfo.cpp
Normal file
326
lib/Target/ARM64/ARM64TargetTransformInfo.cpp
Normal file
@ -0,0 +1,326 @@
|
||||
//===-- ARM64TargetTransformInfo.cpp - ARM64 specific TTI pass ------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// \file
|
||||
/// This file implements a TargetTransformInfo analysis pass specific to the
|
||||
/// ARM64 target machine. It uses the target's detailed information to provide
|
||||
/// more precise answers to certain TTI queries, while letting the target
|
||||
/// independent and default TTI implementations handle the rest.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "arm64tti"
|
||||
#include "ARM64.h"
|
||||
#include "ARM64TargetMachine.h"
|
||||
#include "MCTargetDesc/ARM64AddressingModes.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Target/CostTable.h"
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
using namespace llvm;
|
||||
|
||||
// Declare the pass initialization routine locally as target-specific passes
|
||||
// don't havve a target-wide initialization entry point, and so we rely on the
|
||||
// pass constructor initialization.
|
||||
namespace llvm {
|
||||
void initializeARM64TTIPass(PassRegistry &);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class ARM64TTI final : public ImmutablePass, public TargetTransformInfo {
|
||||
const ARM64TargetMachine *TM;
|
||||
const ARM64Subtarget *ST;
|
||||
const ARM64TargetLowering *TLI;
|
||||
|
||||
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
|
||||
/// are set if the result needs to be inserted and/or extracted from vectors.
|
||||
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
|
||||
|
||||
public:
|
||||
ARM64TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
|
||||
llvm_unreachable("This pass cannot be directly constructed");
|
||||
}
|
||||
|
||||
ARM64TTI(const ARM64TargetMachine *TM)
|
||||
: ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
|
||||
TLI(TM->getTargetLowering()) {
|
||||
initializeARM64TTIPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
void initializePass() override { pushTTIStack(this); }
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
TargetTransformInfo::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
/// Pass identification.
|
||||
static char ID;
|
||||
|
||||
/// Provide necessary pointer adjustments for the two base classes.
|
||||
void *getAdjustedAnalysisPointer(const void *ID) override {
|
||||
if (ID == &TargetTransformInfo::ID)
|
||||
return (TargetTransformInfo *)this;
|
||||
return this;
|
||||
}
|
||||
|
||||
/// \name Scalar TTI Implementations
|
||||
/// @{
|
||||
|
||||
unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
|
||||
PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
|
||||
|
||||
/// @}
|
||||
|
||||
/// \name Vector TTI Implementations
|
||||
/// @{
|
||||
|
||||
unsigned getNumberOfRegisters(bool Vector) const override {
|
||||
if (Vector)
|
||||
return 32;
|
||||
|
||||
return 31;
|
||||
}
|
||||
|
||||
unsigned getRegisterBitWidth(bool Vector) const override {
|
||||
if (Vector)
|
||||
return 128;
|
||||
|
||||
return 64;
|
||||
}
|
||||
|
||||
unsigned getMaximumUnrollFactor() const override { return 2; }
|
||||
|
||||
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const
|
||||
override;
|
||||
|
||||
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const
|
||||
override;
|
||||
|
||||
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
|
||||
OperandValueKind Opd1Info = OK_AnyValue,
|
||||
OperandValueKind Opd2Info = OK_AnyValue) const
|
||||
override;
|
||||
|
||||
unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override;
|
||||
|
||||
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const
|
||||
override;
|
||||
|
||||
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
unsigned AddressSpace) const override;
|
||||
/// @}
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
INITIALIZE_AG_PASS(ARM64TTI, TargetTransformInfo, "arm64tti",
|
||||
"ARM64 Target Transform Info", true, true, false)
|
||||
char ARM64TTI::ID = 0;
|
||||
|
||||
ImmutablePass *
|
||||
llvm::createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM) {
|
||||
return new ARM64TTI(TM);
|
||||
}
|
||||
|
||||
unsigned ARM64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
|
||||
assert(Ty->isIntegerTy());
|
||||
|
||||
unsigned BitSize = Ty->getPrimitiveSizeInBits();
|
||||
if (BitSize == 0)
|
||||
return ~0U;
|
||||
|
||||
int64_t Val = Imm.getSExtValue();
|
||||
if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, BitSize))
|
||||
return 1;
|
||||
|
||||
if ((int64_t)Val < 0)
|
||||
Val = ~Val;
|
||||
if (BitSize == 32)
|
||||
Val &= (1LL << 32) - 1;
|
||||
|
||||
unsigned LZ = countLeadingZeros((uint64_t)Val);
|
||||
unsigned Shift = (63 - LZ) / 16;
|
||||
// MOVZ is free so return true for one or fewer MOVK.
|
||||
return (Shift == 0) ? 1 : Shift;
|
||||
}
|
||||
|
||||
ARM64TTI::PopcntSupportKind ARM64TTI::getPopcntSupport(unsigned TyWidth) const {
|
||||
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
|
||||
if (TyWidth == 32 || TyWidth == 64)
|
||||
return PSK_FastHardware;
|
||||
// TODO: ARM64TargetLowering::LowerCTPOP() supports 128bit popcount.
|
||||
return PSK_Software;
|
||||
}
|
||||
|
||||
unsigned ARM64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||
Type *Src) const {
|
||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||
assert(ISD && "Invalid opcode");
|
||||
|
||||
EVT SrcTy = TLI->getValueType(Src);
|
||||
EVT DstTy = TLI->getValueType(Dst);
|
||||
|
||||
if (!SrcTy.isSimple() || !DstTy.isSimple())
|
||||
return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
|
||||
|
||||
static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
|
||||
// LowerVectorINT_TO_FP:
|
||||
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
|
||||
// LowerVectorFP_TO_INT
|
||||
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
|
||||
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 },
|
||||
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 4 },
|
||||
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 4 },
|
||||
};
|
||||
|
||||
int Idx = ConvertCostTableLookup<MVT>(
|
||||
ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(),
|
||||
SrcTy.getSimpleVT());
|
||||
if (Idx != -1)
|
||||
return ConversionTbl[Idx].Cost;
|
||||
|
||||
return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
|
||||
}
|
||||
|
||||
unsigned ARM64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
|
||||
unsigned Index) const {
|
||||
assert(Val->isVectorTy() && "This must be a vector type");
|
||||
|
||||
if (Index != -1U) {
|
||||
// Legalize the type.
|
||||
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
|
||||
|
||||
// This type is legalized to a scalar type.
|
||||
if (!LT.second.isVector())
|
||||
return 0;
|
||||
|
||||
// The type may be split. Normalize the index to the new type.
|
||||
unsigned Width = LT.second.getVectorNumElements();
|
||||
Index = Index % Width;
|
||||
|
||||
// The element at index zero is already inside the vector.
|
||||
if (Index == 0)
|
||||
return 0;
|
||||
}
|
||||
|
||||
// All other insert/extracts cost this much.
|
||||
return 2;
|
||||
}
|
||||
|
||||
unsigned ARM64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
|
||||
OperandValueKind Opd1Info,
|
||||
OperandValueKind Opd2Info) const {
|
||||
// Legalize the type.
|
||||
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
|
||||
|
||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||
|
||||
switch (ISD) {
|
||||
default:
|
||||
return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Opd1Info,
|
||||
Opd2Info);
|
||||
case ISD::ADD:
|
||||
case ISD::MUL:
|
||||
case ISD::XOR:
|
||||
case ISD::OR:
|
||||
case ISD::AND:
|
||||
// These nodes are marked as 'custom' for combining purposes only.
|
||||
// We know that they are legal. See LowerAdd in ISelLowering.
|
||||
return 1 * LT.first;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned ARM64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
|
||||
// Address computations in vectorized code with non-consecutive addresses will
|
||||
// likely result in more instructions compared to scalar code where the
|
||||
// computation can more often be merged into the index mode. The resulting
|
||||
// extra micro-ops can significantly decrease throughput.
|
||||
unsigned NumVectorInstToHideOverhead = 10;
|
||||
|
||||
if (Ty->isVectorTy() && IsComplex)
|
||||
return NumVectorInstToHideOverhead;
|
||||
|
||||
// In many cases the address computation is not merged into the instruction
|
||||
// addressing mode.
|
||||
return 1;
|
||||
}
|
||||
|
||||
unsigned ARM64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||
Type *CondTy) const {
|
||||
|
||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||
// We don't lower vector selects well that are wider than the register width.
|
||||
if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
|
||||
// We would need this many instructions to hide the scalarization happening.
|
||||
unsigned AmortizationCost = 20;
|
||||
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
|
||||
VectorSelectTbl[] = {
|
||||
{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost },
|
||||
{ ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost },
|
||||
{ ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost },
|
||||
{ ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
|
||||
{ ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
|
||||
{ ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
|
||||
};
|
||||
|
||||
EVT SelCondTy = TLI->getValueType(CondTy);
|
||||
EVT SelValTy = TLI->getValueType(ValTy);
|
||||
if (SelCondTy.isSimple() && SelValTy.isSimple()) {
|
||||
int Idx =
|
||||
ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(),
|
||||
SelValTy.getSimpleVT());
|
||||
if (Idx != -1)
|
||||
return VectorSelectTbl[Idx].Cost;
|
||||
}
|
||||
}
|
||||
return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
|
||||
}
|
||||
|
||||
unsigned ARM64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
unsigned Alignment,
|
||||
unsigned AddressSpace) const {
|
||||
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
|
||||
|
||||
if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
|
||||
Src->getVectorElementType()->isIntegerTy(64)) {
|
||||
// Unaligned stores are extremely inefficient. We don't split
|
||||
// unaligned v2i64 stores because the negative impact that has shown in
|
||||
// practice on inlined memcpy code.
|
||||
// We make v2i64 stores expensive so that we will only vectorize if there
|
||||
// are 6 other instructions getting vectorized.
|
||||
unsigned AmortizationCost = 6;
|
||||
|
||||
return LT.first * 2 * AmortizationCost;
|
||||
}
|
||||
|
||||
if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) &&
|
||||
Src->getVectorNumElements() < 8) {
|
||||
// We scalarize the loads/stores because there is not v.4b register and we
|
||||
// have to promote the elements to v.4h.
|
||||
unsigned NumVecElts = Src->getVectorNumElements();
|
||||
unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
|
||||
// We generate 2 instructions per vector element.
|
||||
return NumVectorizableInstsToAmortize * NumVecElts * 2;
|
||||
}
|
||||
|
||||
return LT.first;
|
||||
}
|
4832
lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp
Normal file
4832
lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp
Normal file
File diff suppressed because it is too large
Load Diff
6
lib/Target/ARM64/AsmParser/CMakeLists.txt
Normal file
6
lib/Target/ARM64/AsmParser/CMakeLists.txt
Normal file
@ -0,0 +1,6 @@
|
||||
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
|
||||
|
||||
add_llvm_library(LLVMARM64AsmParser
|
||||
ARM64AsmParser.cpp
|
||||
)
|
||||
|
24
lib/Target/ARM64/AsmParser/LLVMBuild.txt
Normal file
24
lib/Target/ARM64/AsmParser/LLVMBuild.txt
Normal file
@ -0,0 +1,24 @@
|
||||
;===- ./lib/Target/ARM64/AsmParser/LLVMBuild.txt ---------------*- Conf -*--===;
|
||||
;
|
||||
; The LLVM Compiler Infrastructure
|
||||
;
|
||||
; This file is distributed under the University of Illinois Open Source
|
||||
; License. See LICENSE.TXT for details.
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
;
|
||||
; This is an LLVMBuild description file for the components in this subdirectory.
|
||||
;
|
||||
; For more information on the LLVMBuild system, please see:
|
||||
;
|
||||
; http://llvm.org/docs/LLVMBuild.html
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
[component_0]
|
||||
type = Library
|
||||
name = ARM64AsmParser
|
||||
parent = ARM64
|
||||
required_libraries = ARM64Desc ARM64Info MC MCParser Support
|
||||
add_to_library_groups = ARM64
|
||||
|
15
lib/Target/ARM64/AsmParser/Makefile
Normal file
15
lib/Target/ARM64/AsmParser/Makefile
Normal file
@ -0,0 +1,15 @@
|
||||
##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===##
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
LEVEL = ../../../..
|
||||
LIBRARYNAME = LLVMARM64AsmParser
|
||||
|
||||
# Hack: we need to include 'main' ARM target directory to grab private headers
|
||||
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
|
||||
|
||||
include $(LEVEL)/Makefile.common
|
50
lib/Target/ARM64/CMakeLists.txt
Normal file
50
lib/Target/ARM64/CMakeLists.txt
Normal file
@ -0,0 +1,50 @@
|
||||
set(LLVM_TARGET_DEFINITIONS ARM64.td)
|
||||
|
||||
tablegen(LLVM ARM64GenRegisterInfo.inc -gen-register-info)
|
||||
tablegen(LLVM ARM64GenInstrInfo.inc -gen-instr-info)
|
||||
tablegen(LLVM ARM64GenMCCodeEmitter.inc -gen-emitter -mc-emitter)
|
||||
tablegen(LLVM ARM64GenMCPseudoLowering.inc -gen-pseudo-lowering)
|
||||
tablegen(LLVM ARM64GenAsmWriter.inc -gen-asm-writer)
|
||||
tablegen(LLVM ARM64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
|
||||
tablegen(LLVM ARM64GenAsmMatcher.inc -gen-asm-matcher)
|
||||
tablegen(LLVM ARM64GenDAGISel.inc -gen-dag-isel)
|
||||
tablegen(LLVM ARM64GenFastISel.inc -gen-fast-isel)
|
||||
tablegen(LLVM ARM64GenCallingConv.inc -gen-callingconv)
|
||||
tablegen(LLVM ARM64GenSubtargetInfo.inc -gen-subtarget)
|
||||
tablegen(LLVM ARM64GenDisassemblerTables.inc -gen-disassembler)
|
||||
add_public_tablegen_target(ARM64CommonTableGen)
|
||||
|
||||
add_llvm_target(ARM64CodeGen
|
||||
ARM64AddressTypePromotion.cpp
|
||||
ARM64AdvSIMDScalarPass.cpp
|
||||
ARM64AsmPrinter.cpp
|
||||
ARM64BranchRelaxation.cpp
|
||||
ARM64CleanupLocalDynamicTLSPass.cpp
|
||||
ARM64CollectLOH.cpp
|
||||
ARM64ConditionalCompares.cpp
|
||||
ARM64DeadRegisterDefinitionsPass.cpp
|
||||
ARM64ExpandPseudoInsts.cpp
|
||||
ARM64FastISel.cpp
|
||||
ARM64FrameLowering.cpp
|
||||
ARM64ISelDAGToDAG.cpp
|
||||
ARM64ISelLowering.cpp
|
||||
ARM64InstrInfo.cpp
|
||||
ARM64LoadStoreOptimizer.cpp
|
||||
ARM64MCInstLower.cpp
|
||||
ARM64PromoteConstant.cpp
|
||||
ARM64RegisterInfo.cpp
|
||||
ARM64SelectionDAGInfo.cpp
|
||||
ARM64StorePairSuppress.cpp
|
||||
ARM64Subtarget.cpp
|
||||
ARM64TargetMachine.cpp
|
||||
ARM64TargetObjectFile.cpp
|
||||
ARM64TargetTransformInfo.cpp
|
||||
)
|
||||
|
||||
add_dependencies(LLVMARM64CodeGen intrinsics_gen)
|
||||
|
||||
add_subdirectory(TargetInfo)
|
||||
add_subdirectory(AsmParser)
|
||||
add_subdirectory(Disassembler)
|
||||
add_subdirectory(InstPrinter)
|
||||
add_subdirectory(MCTargetDesc)
|
2142
lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp
Normal file
2142
lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp
Normal file
File diff suppressed because it is too large
Load Diff
54
lib/Target/ARM64/Disassembler/ARM64Disassembler.h
Normal file
54
lib/Target/ARM64/Disassembler/ARM64Disassembler.h
Normal file
@ -0,0 +1,54 @@
|
||||
//===- ARM64Disassembler.h - Disassembler for ARM64 -------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef ARM64DISASSEMBLER_H
|
||||
#define ARM64DISASSEMBLER_H
|
||||
|
||||
#include "llvm/MC/MCDisassembler.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class MCInst;
|
||||
class MemoryObject;
|
||||
class raw_ostream;
|
||||
|
||||
class ARM64Disassembler : public MCDisassembler {
|
||||
public:
|
||||
ARM64Disassembler(const MCSubtargetInfo &STI) : MCDisassembler(STI) {}
|
||||
|
||||
~ARM64Disassembler() {}
|
||||
|
||||
/// getInstruction - See MCDisassembler.
|
||||
MCDisassembler::DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
|
||||
const MemoryObject ®ion,
|
||||
uint64_t address,
|
||||
raw_ostream &vStream,
|
||||
raw_ostream &cStream) const;
|
||||
|
||||
/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
|
||||
/// operand in place of the immediate Value in the MCInst. The immediate
|
||||
/// Value has not had any PC adjustment made by the caller. If the instruction
|
||||
/// adds the PC to the immediate Value then InstsAddsAddressToValue is true,
|
||||
/// else false. If the getOpInfo() function was set as part of the
|
||||
/// setupForSymbolicDisassembly() call then that function is called to get any
|
||||
/// symbolic information at the Address for this instrution. If that returns
|
||||
/// non-zero then the symbolic information it returns is used to create an
|
||||
/// MCExpr and that is added as an operand to the MCInst. This function
|
||||
/// returns true if it adds an operand to the MCInst and false otherwise.
|
||||
bool tryAddingSymbolicOperand(uint64_t Address, int Value,
|
||||
bool InstsAddsAddressToValue, uint64_t InstSize,
|
||||
MCInst &MI, uint32_t insn = 0) const;
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
13
lib/Target/ARM64/Disassembler/CMakeLists.txt
Normal file
13
lib/Target/ARM64/Disassembler/CMakeLists.txt
Normal file
@ -0,0 +1,13 @@
|
||||
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
|
||||
|
||||
add_llvm_library(LLVMARM64Disassembler
|
||||
ARM64Disassembler.cpp
|
||||
)
|
||||
# workaround for hanging compilation on MSVC8, 9 and 10
|
||||
#if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
|
||||
#set_property(
|
||||
# SOURCE ARMDisassembler.cpp
|
||||
# PROPERTY COMPILE_FLAGS "/Od"
|
||||
# )
|
||||
#endif()
|
||||
add_dependencies(LLVMARM64Disassembler ARM64CommonTableGen)
|
24
lib/Target/ARM64/Disassembler/LLVMBuild.txt
Normal file
24
lib/Target/ARM64/Disassembler/LLVMBuild.txt
Normal file
@ -0,0 +1,24 @@
|
||||
;===- ./lib/Target/ARM64/Disassembler/LLVMBuild.txt ------------*- Conf -*--===;
|
||||
;
|
||||
; The LLVM Compiler Infrastructure
|
||||
;
|
||||
; This file is distributed under the University of Illinois Open Source
|
||||
; License. See LICENSE.TXT for details.
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
;
|
||||
; This is an LLVMBuild description file for the components in this subdirectory.
|
||||
;
|
||||
; For more information on the LLVMBuild system, please see:
|
||||
;
|
||||
; http://llvm.org/docs/LLVMBuild.html
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
[component_0]
|
||||
type = Library
|
||||
name = ARM64Disassembler
|
||||
parent = ARM64
|
||||
required_libraries = ARM64Desc ARM64Info MC Support
|
||||
add_to_library_groups = ARM64
|
||||
|
16
lib/Target/ARM64/Disassembler/Makefile
Normal file
16
lib/Target/ARM64/Disassembler/Makefile
Normal file
@ -0,0 +1,16 @@
|
||||
##===- lib/Target/ARM64/Disassembler/Makefile --------------*- Makefile -*-===##
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
LEVEL = ../../../..
|
||||
LIBRARYNAME = LLVMARM64Disassembler
|
||||
|
||||
# Hack: we need to include 'main' arm target directory to grab private headers
|
||||
CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
|
||||
|
||||
include $(LEVEL)/Makefile.common
|
1428
lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp
Normal file
1428
lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp
Normal file
File diff suppressed because it is too large
Load Diff
157
lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h
Normal file
157
lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h
Normal file
@ -0,0 +1,157 @@
|
||||
//===-- ARM64InstPrinter.h - Convert ARM64 MCInst to assembly syntax ------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This class prints an ARM64 MCInst to a .s file.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef ARM64INSTPRINTER_H
|
||||
#define ARM64INSTPRINTER_H
|
||||
|
||||
#include "MCTargetDesc/ARM64MCTargetDesc.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/MC/MCInstPrinter.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class MCOperand;
|
||||
|
||||
class ARM64InstPrinter : public MCInstPrinter {
|
||||
public:
|
||||
ARM64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
|
||||
const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
|
||||
|
||||
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
|
||||
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
|
||||
|
||||
// Autogenerated by tblgen.
|
||||
virtual void printInstruction(const MCInst *MI, raw_ostream &O);
|
||||
virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O);
|
||||
virtual StringRef getRegName(unsigned RegNo) const {
|
||||
return getRegisterName(RegNo);
|
||||
}
|
||||
static const char *getRegisterName(unsigned RegNo,
|
||||
unsigned AltIdx = ARM64::NoRegAltName);
|
||||
|
||||
protected:
|
||||
bool printSysAlias(const MCInst *MI, raw_ostream &O);
|
||||
// Operand printers
|
||||
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm,
|
||||
raw_ostream &O);
|
||||
void printPostIncOperand1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printPostIncOperand2(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printPostIncOperand3(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printPostIncOperand4(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printPostIncOperand6(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printPostIncOperand8(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printPostIncOperand12(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printPostIncOperand16(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printPostIncOperand24(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printPostIncOperand32(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printPostIncOperand48(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printPostIncOperand64(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printVRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printSysCROperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printAddSubImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printLogicalImm32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printLogicalImm64(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printShifter(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printShiftedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printDotCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printAlignedBranchTarget(const MCInst *MI, unsigned OpNum,
|
||||
raw_ostream &O);
|
||||
void printAMIndexed(const MCInst *MI, unsigned OpNum, unsigned Scale,
|
||||
raw_ostream &O);
|
||||
void printAMIndexed128(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
|
||||
printAMIndexed(MI, OpNum, 16, O);
|
||||
}
|
||||
|
||||
void printAMIndexed64(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
|
||||
printAMIndexed(MI, OpNum, 8, O);
|
||||
}
|
||||
|
||||
void printAMIndexed32(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
|
||||
printAMIndexed(MI, OpNum, 4, O);
|
||||
}
|
||||
|
||||
void printAMIndexed16(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
|
||||
printAMIndexed(MI, OpNum, 2, O);
|
||||
}
|
||||
|
||||
void printAMIndexed8(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
|
||||
printAMIndexed(MI, OpNum, 1, O);
|
||||
}
|
||||
void printAMUnscaled(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
|
||||
printAMIndexed(MI, OpNum, 1, O);
|
||||
}
|
||||
void printAMNoIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printImmScale4(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printImmScale8(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printImmScale16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printMemoryPostIndexed(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printMemoryPostIndexed32(const MCInst *MI, unsigned OpNum,
|
||||
raw_ostream &O);
|
||||
void printMemoryPostIndexed64(const MCInst *MI, unsigned OpNum,
|
||||
raw_ostream &O);
|
||||
void printMemoryPostIndexed128(const MCInst *MI, unsigned OpNum,
|
||||
raw_ostream &O);
|
||||
void printMemoryRegOffset(const MCInst *MI, unsigned OpNum, raw_ostream &O,
|
||||
int LegalShiftAmt);
|
||||
void printMemoryRegOffset8(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printMemoryRegOffset16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printMemoryRegOffset32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printMemoryRegOffset64(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printMemoryRegOffset128(const MCInst *MI, unsigned OpNum,
|
||||
raw_ostream &O);
|
||||
|
||||
void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
|
||||
void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O,
|
||||
StringRef LayoutSuffix);
|
||||
|
||||
/// Print a list of vector registers where the type suffix is implicit
|
||||
/// (i.e. attached to the instruction rather than the registers).
|
||||
void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum,
|
||||
raw_ostream &O);
|
||||
|
||||
template <unsigned NumLanes, char LaneKind>
|
||||
void printTypedVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
|
||||
void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printAdrpLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printBarrierOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printSystemCPSRField(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
|
||||
};
|
||||
|
||||
class ARM64AppleInstPrinter : public ARM64InstPrinter {
|
||||
public:
|
||||
ARM64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
|
||||
const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
|
||||
|
||||
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
|
||||
|
||||
virtual void printInstruction(const MCInst *MI, raw_ostream &O);
|
||||
virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O);
|
||||
virtual StringRef getRegName(unsigned RegNo) const {
|
||||
return getRegisterName(RegNo);
|
||||
}
|
||||
static const char *getRegisterName(unsigned RegNo,
|
||||
unsigned AltIdx = ARM64::NoRegAltName);
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
7
lib/Target/ARM64/InstPrinter/CMakeLists.txt
Normal file
7
lib/Target/ARM64/InstPrinter/CMakeLists.txt
Normal file
@ -0,0 +1,7 @@
|
||||
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
|
||||
|
||||
add_llvm_library(LLVMARM64AsmPrinter
|
||||
ARM64InstPrinter.cpp
|
||||
)
|
||||
|
||||
add_dependencies(LLVMARM64AsmPrinter ARM64CommonTableGen)
|
24
lib/Target/ARM64/InstPrinter/LLVMBuild.txt
Normal file
24
lib/Target/ARM64/InstPrinter/LLVMBuild.txt
Normal file
@ -0,0 +1,24 @@
|
||||
;===- ./lib/Target/ARM64/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===;
|
||||
;
|
||||
; The LLVM Compiler Infrastructure
|
||||
;
|
||||
; This file is distributed under the University of Illinois Open Source
|
||||
; License. See LICENSE.TXT for details.
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
;
|
||||
; This is an LLVMBuild description file for the components in this subdirectory.
|
||||
;
|
||||
; For more information on the LLVMBuild system, please see:
|
||||
;
|
||||
; http://llvm.org/docs/LLVMBuild.html
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
[component_0]
|
||||
type = Library
|
||||
name = ARM64AsmPrinter
|
||||
parent = ARM64
|
||||
required_libraries = MC Support
|
||||
add_to_library_groups = ARM64
|
||||
|
15
lib/Target/ARM64/InstPrinter/Makefile
Normal file
15
lib/Target/ARM64/InstPrinter/Makefile
Normal file
@ -0,0 +1,15 @@
|
||||
##===- lib/Target/ARM64/AsmPrinter/Makefile ----------------*- Makefile -*-===##
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
LEVEL = ../../../..
|
||||
LIBRARYNAME = LLVMARM64AsmPrinter
|
||||
|
||||
# Hack: we need to include 'main' arm target directory to grab private headers
|
||||
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
|
||||
|
||||
include $(LEVEL)/Makefile.common
|
36
lib/Target/ARM64/LLVMBuild.txt
Normal file
36
lib/Target/ARM64/LLVMBuild.txt
Normal file
@ -0,0 +1,36 @@
|
||||
;===- ./lib/Target/ARM64/LLVMBuild.txt -------------------------*- Conf -*--===;
|
||||
;
|
||||
; The LLVM Compiler Infrastructure
|
||||
;
|
||||
; This file is distributed under the University of Illinois Open Source
|
||||
; License. See LICENSE.TXT for details.
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
;
|
||||
; This is an LLVMBuild description file for the components in this subdirectory.
|
||||
;
|
||||
; For more information on the LLVMBuild system, please see:
|
||||
;
|
||||
; http://llvm.org/docs/LLVMBuild.html
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
[common]
|
||||
subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
|
||||
|
||||
[component_0]
|
||||
type = TargetGroup
|
||||
name = ARM64
|
||||
parent = Target
|
||||
has_asmparser = 1
|
||||
has_asmprinter = 1
|
||||
has_disassembler = 1
|
||||
has_jit = 1
|
||||
|
||||
[component_1]
|
||||
type = Library
|
||||
name = ARM64CodeGen
|
||||
parent = ARM64
|
||||
required_libraries = ARM64AsmPrinter ARM64Desc ARM64Info Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target
|
||||
add_to_library_groups = ARM64
|
||||
|
759
lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h
Normal file
759
lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h
Normal file
@ -0,0 +1,759 @@
|
||||
//===- ARM64AddressingModes.h - ARM64 Addressing Modes ----------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the ARM64 addressing mode implementation stuff.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TARGET_ARM64_ARM64ADDRESSINGMODES_H
|
||||
#define LLVM_TARGET_ARM64_ARM64ADDRESSINGMODES_H
|
||||
|
||||
#include "llvm/ADT/APFloat.h"
|
||||
#include "llvm/ADT/APInt.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// ARM64_AM - ARM64 Addressing Mode Stuff
|
||||
namespace ARM64_AM {
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Shifts
|
||||
//
|
||||
|
||||
enum ShiftType {
|
||||
InvalidShift = -1,
|
||||
LSL = 0,
|
||||
LSR = 1,
|
||||
ASR = 2,
|
||||
ROR = 3,
|
||||
MSL = 4
|
||||
};
|
||||
|
||||
/// getShiftName - Get the string encoding for the shift type.
|
||||
static inline const char *getShiftName(ARM64_AM::ShiftType ST) {
|
||||
switch (ST) {
|
||||
default: assert(false && "unhandled shift type!");
|
||||
case ARM64_AM::LSL: return "lsl";
|
||||
case ARM64_AM::LSR: return "lsr";
|
||||
case ARM64_AM::ASR: return "asr";
|
||||
case ARM64_AM::ROR: return "ror";
|
||||
case ARM64_AM::MSL: return "msl";
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// getShiftType - Extract the shift type.
|
||||
static inline ARM64_AM::ShiftType getShiftType(unsigned Imm) {
|
||||
return ARM64_AM::ShiftType((Imm >> 6) & 0x7);
|
||||
}
|
||||
|
||||
/// getShiftValue - Extract the shift value.
|
||||
static inline unsigned getShiftValue(unsigned Imm) {
|
||||
return Imm & 0x3f;
|
||||
}
|
||||
|
||||
/// getShifterImm - Encode the shift type and amount:
|
||||
/// imm: 6-bit shift amount
|
||||
/// shifter: 000 ==> lsl
|
||||
/// 001 ==> lsr
|
||||
/// 010 ==> asr
|
||||
/// 011 ==> ror
|
||||
/// 100 ==> msl
|
||||
/// {8-6} = shifter
|
||||
/// {5-0} = imm
|
||||
static inline unsigned getShifterImm(ARM64_AM::ShiftType ST, unsigned Imm) {
|
||||
assert((Imm & 0x3f) == Imm && "Illegal shifted immedate value!");
|
||||
return (unsigned(ST) << 6) | (Imm & 0x3f);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Extends
|
||||
//
|
||||
|
||||
enum ExtendType {
|
||||
InvalidExtend = -1,
|
||||
UXTB = 0,
|
||||
UXTH = 1,
|
||||
UXTW = 2,
|
||||
UXTX = 3,
|
||||
SXTB = 4,
|
||||
SXTH = 5,
|
||||
SXTW = 6,
|
||||
SXTX = 7
|
||||
};
|
||||
|
||||
/// getExtendName - Get the string encoding for the extend type.
|
||||
static inline const char *getExtendName(ARM64_AM::ExtendType ET) {
|
||||
switch (ET) {
|
||||
default: assert(false && "unhandled extend type!");
|
||||
case ARM64_AM::UXTB: return "uxtb";
|
||||
case ARM64_AM::UXTH: return "uxth";
|
||||
case ARM64_AM::UXTW: return "uxtw";
|
||||
case ARM64_AM::UXTX: return "uxtx";
|
||||
case ARM64_AM::SXTB: return "sxtb";
|
||||
case ARM64_AM::SXTH: return "sxth";
|
||||
case ARM64_AM::SXTW: return "sxtw";
|
||||
case ARM64_AM::SXTX: return "sxtx";
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// getArithShiftValue - get the arithmetic shift value.
|
||||
static inline unsigned getArithShiftValue(unsigned Imm) {
|
||||
return Imm & 0x7;
|
||||
}
|
||||
|
||||
/// getExtendType - Extract the extend type for operands of arithmetic ops.
|
||||
static inline ARM64_AM::ExtendType getArithExtendType(unsigned Imm) {
|
||||
return ARM64_AM::ExtendType((Imm >> 3) & 0x7);
|
||||
}
|
||||
|
||||
/// getArithExtendImm - Encode the extend type and shift amount for an
|
||||
/// arithmetic instruction:
|
||||
/// imm: 3-bit extend amount
|
||||
/// shifter: 000 ==> uxtb
|
||||
/// 001 ==> uxth
|
||||
/// 010 ==> uxtw
|
||||
/// 011 ==> uxtx
|
||||
/// 100 ==> sxtb
|
||||
/// 101 ==> sxth
|
||||
/// 110 ==> sxtw
|
||||
/// 111 ==> sxtx
|
||||
/// {5-3} = shifter
|
||||
/// {2-0} = imm3
|
||||
static inline unsigned getArithExtendImm(ARM64_AM::ExtendType ET,
|
||||
unsigned Imm) {
|
||||
assert((Imm & 0x7) == Imm && "Illegal shifted immedate value!");
|
||||
return (unsigned(ET) << 3) | (Imm & 0x7);
|
||||
}
|
||||
|
||||
/// getMemDoShift - Extract the "do shift" flag value for load/store
|
||||
/// instructions.
|
||||
static inline bool getMemDoShift(unsigned Imm) {
|
||||
return (Imm & 0x1) != 0;
|
||||
}
|
||||
|
||||
/// getExtendType - Extract the extend type for the offset operand of
|
||||
/// loads/stores.
|
||||
static inline ARM64_AM::ExtendType getMemExtendType(unsigned Imm) {
|
||||
return ARM64_AM::ExtendType((Imm >> 1) & 0x7);
|
||||
}
|
||||
|
||||
/// getExtendImm - Encode the extend type and amount for a load/store inst:
|
||||
/// imm: 3-bit extend amount
|
||||
/// shifter: 000 ==> uxtb
|
||||
/// 001 ==> uxth
|
||||
/// 010 ==> uxtw
|
||||
/// 011 ==> uxtx
|
||||
/// 100 ==> sxtb
|
||||
/// 101 ==> sxth
|
||||
/// 110 ==> sxtw
|
||||
/// 111 ==> sxtx
|
||||
/// {3-1} = shifter
|
||||
/// {0} = imm3
|
||||
static inline unsigned getMemExtendImm(ARM64_AM::ExtendType ET, bool Imm) {
|
||||
assert((Imm & 0x7) == Imm && "Illegal shifted immedate value!");
|
||||
return (unsigned(ET) << 1) | (Imm & 0x7);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Prefetch
|
||||
//
|
||||
|
||||
/// Pre-fetch operator names.
|
||||
/// The enum values match the encoding values:
|
||||
/// prfop<4:3> 00=preload data, 10=prepare for store
|
||||
/// prfop<2:1> 00=target L1 cache, 01=target L2 cache, 10=target L3 cache,
|
||||
/// prfop<0> 0=non-streaming (temporal), 1=streaming (non-temporal)
|
||||
enum PrefetchOp {
|
||||
InvalidPrefetchOp = -1,
|
||||
PLDL1KEEP = 0x00,
|
||||
PLDL1STRM = 0x01,
|
||||
PLDL2KEEP = 0x02,
|
||||
PLDL2STRM = 0x03,
|
||||
PLDL3KEEP = 0x04,
|
||||
PLDL3STRM = 0x05,
|
||||
PSTL1KEEP = 0x10,
|
||||
PSTL1STRM = 0x11,
|
||||
PSTL2KEEP = 0x12,
|
||||
PSTL2STRM = 0x13,
|
||||
PSTL3KEEP = 0x14,
|
||||
PSTL3STRM = 0x15
|
||||
};
|
||||
|
||||
/// isNamedPrefetchOp - Check if the prefetch-op 5-bit value has a name.
|
||||
static inline bool isNamedPrefetchOp(unsigned prfop) {
|
||||
switch (prfop) {
|
||||
default: return false;
|
||||
case ARM64_AM::PLDL1KEEP: case ARM64_AM::PLDL1STRM: case ARM64_AM::PLDL2KEEP:
|
||||
case ARM64_AM::PLDL2STRM: case ARM64_AM::PLDL3KEEP: case ARM64_AM::PLDL3STRM:
|
||||
case ARM64_AM::PSTL1KEEP: case ARM64_AM::PSTL1STRM: case ARM64_AM::PSTL2KEEP:
|
||||
case ARM64_AM::PSTL2STRM: case ARM64_AM::PSTL3KEEP: case ARM64_AM::PSTL3STRM:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// getPrefetchOpName - Get the string encoding for the prefetch operator.
|
||||
static inline const char *getPrefetchOpName(ARM64_AM::PrefetchOp prfop) {
|
||||
switch (prfop) {
|
||||
default: assert(false && "unhandled prefetch-op type!");
|
||||
case ARM64_AM::PLDL1KEEP: return "pldl1keep";
|
||||
case ARM64_AM::PLDL1STRM: return "pldl1strm";
|
||||
case ARM64_AM::PLDL2KEEP: return "pldl2keep";
|
||||
case ARM64_AM::PLDL2STRM: return "pldl2strm";
|
||||
case ARM64_AM::PLDL3KEEP: return "pldl3keep";
|
||||
case ARM64_AM::PLDL3STRM: return "pldl3strm";
|
||||
case ARM64_AM::PSTL1KEEP: return "pstl1keep";
|
||||
case ARM64_AM::PSTL1STRM: return "pstl1strm";
|
||||
case ARM64_AM::PSTL2KEEP: return "pstl2keep";
|
||||
case ARM64_AM::PSTL2STRM: return "pstl2strm";
|
||||
case ARM64_AM::PSTL3KEEP: return "pstl3keep";
|
||||
case ARM64_AM::PSTL3STRM: return "pstl3strm";
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline uint64_t ror(uint64_t elt, unsigned size) {
|
||||
return ((elt & 1) << (size-1)) | (elt >> 1);
|
||||
}
|
||||
|
||||
/// processLogicalImmediate - Determine if an immediate value can be encoded
|
||||
/// as the immediate operand of a logical instruction for the given register
|
||||
/// size. If so, return true with "encoding" set to the encoded value in
|
||||
/// the form N:immr:imms.
|
||||
static inline bool processLogicalImmediate(uint64_t imm, unsigned regSize,
|
||||
uint64_t &encoding) {
|
||||
if (imm == 0ULL || imm == ~0ULL ||
|
||||
(regSize != 64 && (imm >> regSize != 0 || imm == ~0U)))
|
||||
return false;
|
||||
|
||||
unsigned size = 2;
|
||||
uint64_t eltVal = imm;
|
||||
|
||||
// First, determine the element size.
|
||||
while (size < regSize) {
|
||||
unsigned numElts = regSize / size;
|
||||
unsigned mask = (1ULL << size) - 1;
|
||||
uint64_t lowestEltVal = imm & mask;
|
||||
|
||||
bool allMatched = true;
|
||||
for (unsigned i = 1; i < numElts; ++i) {
|
||||
uint64_t currEltVal = (imm >> (i*size)) & mask;
|
||||
if (currEltVal != lowestEltVal) {
|
||||
allMatched = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (allMatched) {
|
||||
eltVal = lowestEltVal;
|
||||
break;
|
||||
}
|
||||
|
||||
size *= 2;
|
||||
}
|
||||
|
||||
// Second, determine the rotation to make the element be: 0^m 1^n.
|
||||
for (unsigned i = 0; i < size; ++i) {
|
||||
eltVal = ror(eltVal, size);
|
||||
uint32_t clz = countLeadingZeros(eltVal) - (64 - size);
|
||||
uint32_t cto = CountTrailingOnes_64(eltVal);
|
||||
|
||||
if (clz + cto == size) {
|
||||
// Encode in immr the number of RORs it would take to get *from* this
|
||||
// element value to our target value, where i+1 is the number of RORs
|
||||
// to go the opposite direction.
|
||||
unsigned immr = size - (i + 1);
|
||||
|
||||
// If size has a 1 in the n'th bit, create a value that has zeroes in
|
||||
// bits [0, n] and ones above that.
|
||||
uint64_t nimms = ~(size-1) << 1;
|
||||
|
||||
// Or the CTO value into the low bits, which must be below the Nth bit
|
||||
// bit mentioned above.
|
||||
nimms |= (cto-1);
|
||||
|
||||
// Extract the seventh bit and toggle it to create the N field.
|
||||
unsigned N = ((nimms >> 6) & 1) ^ 1;
|
||||
|
||||
encoding = (N << 12) | (immr << 6) | (nimms & 0x3f);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// isLogicalImmediate - Return true if the immediate is valid for a logical
|
||||
/// immediate instruction of the given register size. Return false otherwise.
|
||||
static inline bool isLogicalImmediate(uint64_t imm, unsigned regSize) {
|
||||
uint64_t encoding;
|
||||
return processLogicalImmediate(imm, regSize, encoding);
|
||||
}
|
||||
|
||||
/// encodeLogicalImmediate - Return the encoded immediate value for a logical
|
||||
/// immediate instruction of the given register size.
|
||||
static inline uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize) {
|
||||
uint64_t encoding = 0;
|
||||
bool res = processLogicalImmediate(imm, regSize, encoding);
|
||||
assert(res && "invalid logical immediate");
|
||||
(void)res;
|
||||
return encoding;
|
||||
}
|
||||
|
||||
/// decodeLogicalImmediate - Decode a logical immediate value in the form
|
||||
/// "N:immr:imms" (where the immr and imms fields are each 6 bits) into the
|
||||
/// integer value it represents with regSize bits.
|
||||
static inline uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize) {
|
||||
// Extract the N, imms, and immr fields.
|
||||
unsigned N = (val >> 12) & 1;
|
||||
unsigned immr = (val >> 6) & 0x3f;
|
||||
unsigned imms = val & 0x3f;
|
||||
|
||||
assert((regSize == 64 || N == 0) && "undefined logical immediate encoding");
|
||||
int len = 31 - countLeadingZeros((N << 6) | (~imms & 0x3f));
|
||||
assert(len >= 0 && "undefined logical immediate encoding");
|
||||
unsigned size = (1 << len);
|
||||
unsigned R = immr & (size - 1);
|
||||
unsigned S = imms & (size - 1);
|
||||
assert(S != size - 1 && "undefined logical immediate encoding");
|
||||
uint64_t pattern = (1ULL << (S + 1)) - 1;
|
||||
for (unsigned i = 0; i < R; ++i)
|
||||
pattern = ror(pattern, size);
|
||||
|
||||
// Replicate the pattern to fill the regSize.
|
||||
while (size != regSize) {
|
||||
pattern |= (pattern << size);
|
||||
size *= 2;
|
||||
}
|
||||
return pattern;
|
||||
}
|
||||
|
||||
/// isValidDecodeLogicalImmediate - Check to see if the logical immediate value
|
||||
/// in the form "N:immr:imms" (where the immr and imms fields are each 6 bits)
|
||||
/// is a valid encoding for an integer value with regSize bits.
|
||||
static inline bool isValidDecodeLogicalImmediate(uint64_t val,
|
||||
unsigned regSize) {
|
||||
// Extract the N and imms fields needed for checking.
|
||||
unsigned N = (val >> 12) & 1;
|
||||
unsigned imms = val & 0x3f;
|
||||
|
||||
if (regSize == 32 && N != 0) // undefined logical immediate encoding
|
||||
return false;
|
||||
int len = 31 - countLeadingZeros((N << 6) | (~imms & 0x3f));
|
||||
if (len < 0) // undefined logical immediate encoding
|
||||
return false;
|
||||
unsigned size = (1 << len);
|
||||
unsigned S = imms & (size - 1);
|
||||
if (S == size - 1) // undefined logical immediate encoding
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Floating-point Immediates
|
||||
//
|
||||
static inline float getFPImmFloat(unsigned Imm) {
|
||||
// We expect an 8-bit binary encoding of a floating-point number here.
|
||||
union {
|
||||
uint32_t I;
|
||||
float F;
|
||||
} FPUnion;
|
||||
|
||||
uint8_t Sign = (Imm >> 7) & 0x1;
|
||||
uint8_t Exp = (Imm >> 4) & 0x7;
|
||||
uint8_t Mantissa = Imm & 0xf;
|
||||
|
||||
// 8-bit FP iEEEE Float Encoding
|
||||
// abcd efgh aBbbbbbc defgh000 00000000 00000000
|
||||
//
|
||||
// where B = NOT(b);
|
||||
|
||||
FPUnion.I = 0;
|
||||
FPUnion.I |= Sign << 31;
|
||||
FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
|
||||
FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
|
||||
FPUnion.I |= (Exp & 0x3) << 23;
|
||||
FPUnion.I |= Mantissa << 19;
|
||||
return FPUnion.F;
|
||||
}
|
||||
|
||||
/// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
|
||||
/// floating-point value. If the value cannot be represented as an 8-bit
|
||||
/// floating-point value, then return -1.
|
||||
static inline int getFP32Imm(const APInt &Imm) {
|
||||
uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
|
||||
int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
|
||||
int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
|
||||
|
||||
// We can handle 4 bits of mantissa.
|
||||
// mantissa = (16+UInt(e:f:g:h))/16.
|
||||
if (Mantissa & 0x7ffff)
|
||||
return -1;
|
||||
Mantissa >>= 19;
|
||||
if ((Mantissa & 0xf) != Mantissa)
|
||||
return -1;
|
||||
|
||||
// We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
|
||||
if (Exp < -3 || Exp > 4)
|
||||
return -1;
|
||||
Exp = ((Exp+3) & 0x7) ^ 4;
|
||||
|
||||
return ((int)Sign << 7) | (Exp << 4) | Mantissa;
|
||||
}
|
||||
|
||||
static inline int getFP32Imm(const APFloat &FPImm) {
|
||||
return getFP32Imm(FPImm.bitcastToAPInt());
|
||||
}
|
||||
|
||||
/// getFP64Imm - Return an 8-bit floating-point version of the 64-bit
|
||||
/// floating-point value. If the value cannot be represented as an 8-bit
|
||||
/// floating-point value, then return -1.
|
||||
static inline int getFP64Imm(const APInt &Imm) {
|
||||
uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
|
||||
int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
|
||||
uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL;
|
||||
|
||||
// We can handle 4 bits of mantissa.
|
||||
// mantissa = (16+UInt(e:f:g:h))/16.
|
||||
if (Mantissa & 0xffffffffffffULL)
|
||||
return -1;
|
||||
Mantissa >>= 48;
|
||||
if ((Mantissa & 0xf) != Mantissa)
|
||||
return -1;
|
||||
|
||||
// We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
|
||||
if (Exp < -3 || Exp > 4)
|
||||
return -1;
|
||||
Exp = ((Exp+3) & 0x7) ^ 4;
|
||||
|
||||
return ((int)Sign << 7) | (Exp << 4) | Mantissa;
|
||||
}
|
||||
|
||||
static inline int getFP64Imm(const APFloat &FPImm) {
|
||||
return getFP64Imm(FPImm.bitcastToAPInt());
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// AdvSIMD Modified Immediates
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
// 0x00 0x00 0x00 abcdefgh 0x00 0x00 0x00 abcdefgh
|
||||
static inline bool isAdvSIMDModImmType1(uint64_t Imm) {
|
||||
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
|
||||
((Imm & 0xffffff00ffffff00ULL) == 0);
|
||||
}
|
||||
|
||||
static inline uint8_t encodeAdvSIMDModImmType1(uint64_t Imm) {
|
||||
return (Imm & 0xffULL);
|
||||
}
|
||||
|
||||
static inline uint64_t decodeAdvSIMDModImmType1(uint8_t Imm) {
|
||||
uint64_t EncVal = Imm;
|
||||
return (EncVal << 32) | EncVal;
|
||||
}
|
||||
|
||||
// 0x00 0x00 abcdefgh 0x00 0x00 0x00 abcdefgh 0x00
|
||||
static inline bool isAdvSIMDModImmType2(uint64_t Imm) {
|
||||
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
|
||||
((Imm & 0xffff00ffffff00ffULL) == 0);
|
||||
}
|
||||
|
||||
static inline uint8_t encodeAdvSIMDModImmType2(uint64_t Imm) {
|
||||
return (Imm & 0xff00ULL) >> 8;
|
||||
}
|
||||
|
||||
static inline uint64_t decodeAdvSIMDModImmType2(uint8_t Imm) {
|
||||
uint64_t EncVal = Imm;
|
||||
return (EncVal << 40) | (EncVal << 8);
|
||||
}
|
||||
|
||||
// 0x00 abcdefgh 0x00 0x00 0x00 abcdefgh 0x00 0x00
|
||||
static inline bool isAdvSIMDModImmType3(uint64_t Imm) {
|
||||
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
|
||||
((Imm & 0xff00ffffff00ffffULL) == 0);
|
||||
}
|
||||
|
||||
static inline uint8_t encodeAdvSIMDModImmType3(uint64_t Imm) {
|
||||
return (Imm & 0xff0000ULL) >> 16;
|
||||
}
|
||||
|
||||
static inline uint64_t decodeAdvSIMDModImmType3(uint8_t Imm) {
|
||||
uint64_t EncVal = Imm;
|
||||
return (EncVal << 48) | (EncVal << 16);
|
||||
}
|
||||
|
||||
// abcdefgh 0x00 0x00 0x00 abcdefgh 0x00 0x00 0x00
|
||||
static inline bool isAdvSIMDModImmType4(uint64_t Imm) {
|
||||
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
|
||||
((Imm & 0x00ffffff00ffffffULL) == 0);
|
||||
}
|
||||
|
||||
static inline uint8_t encodeAdvSIMDModImmType4(uint64_t Imm) {
|
||||
return (Imm & 0xff000000ULL) >> 24;
|
||||
}
|
||||
|
||||
static inline uint64_t decodeAdvSIMDModImmType4(uint8_t Imm) {
|
||||
uint64_t EncVal = Imm;
|
||||
return (EncVal << 56) | (EncVal << 24);
|
||||
}
|
||||
|
||||
// 0x00 abcdefgh 0x00 abcdefgh 0x00 abcdefgh 0x00 abcdefgh
|
||||
static inline bool isAdvSIMDModImmType5(uint64_t Imm) {
|
||||
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
|
||||
(((Imm & 0x00ff0000ULL) >> 16) == (Imm & 0x000000ffULL)) &&
|
||||
((Imm & 0xff00ff00ff00ff00ULL) == 0);
|
||||
}
|
||||
|
||||
static inline uint8_t encodeAdvSIMDModImmType5(uint64_t Imm) {
|
||||
return (Imm & 0xffULL);
|
||||
}
|
||||
|
||||
static inline uint64_t decodeAdvSIMDModImmType5(uint8_t Imm) {
|
||||
uint64_t EncVal = Imm;
|
||||
return (EncVal << 48) | (EncVal << 32) | (EncVal << 16) | EncVal;
|
||||
}
|
||||
|
||||
// abcdefgh 0x00 abcdefgh 0x00 abcdefgh 0x00 abcdefgh 0x00
|
||||
static inline bool isAdvSIMDModImmType6(uint64_t Imm) {
|
||||
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
|
||||
(((Imm & 0xff000000ULL) >> 16) == (Imm & 0x0000ff00ULL)) &&
|
||||
((Imm & 0x00ff00ff00ff00ffULL) == 0);
|
||||
}
|
||||
|
||||
static inline uint8_t encodeAdvSIMDModImmType6(uint64_t Imm) {
|
||||
return (Imm & 0xff00ULL) >> 8;
|
||||
}
|
||||
|
||||
static inline uint64_t decodeAdvSIMDModImmType6(uint8_t Imm) {
|
||||
uint64_t EncVal = Imm;
|
||||
return (EncVal << 56) | (EncVal << 40) | (EncVal << 24) | (EncVal << 8);
|
||||
}
|
||||
|
||||
// 0x00 0x00 abcdefgh 0xFF 0x00 0x00 abcdefgh 0xFF
|
||||
static inline bool isAdvSIMDModImmType7(uint64_t Imm) {
|
||||
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
|
||||
((Imm & 0xffff00ffffff00ffULL) == 0x000000ff000000ffULL);
|
||||
}
|
||||
|
||||
static inline uint8_t encodeAdvSIMDModImmType7(uint64_t Imm) {
|
||||
return (Imm & 0xff00ULL) >> 8;
|
||||
}
|
||||
|
||||
static inline uint64_t decodeAdvSIMDModImmType7(uint8_t Imm) {
|
||||
uint64_t EncVal = Imm;
|
||||
return (EncVal << 40) | (EncVal << 8) | 0x000000ff000000ffULL;
|
||||
}
|
||||
|
||||
// 0x00 abcdefgh 0xFF 0xFF 0x00 abcdefgh 0xFF 0xFF
|
||||
static inline bool isAdvSIMDModImmType8(uint64_t Imm) {
|
||||
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
|
||||
((Imm & 0xff00ffffff00ffffULL) == 0x0000ffff0000ffffULL);
|
||||
}
|
||||
|
||||
static inline uint64_t decodeAdvSIMDModImmType8(uint8_t Imm) {
|
||||
uint64_t EncVal = Imm;
|
||||
return (EncVal << 48) | (EncVal << 16) | 0x0000ffff0000ffffULL;
|
||||
}
|
||||
|
||||
static inline uint8_t encodeAdvSIMDModImmType8(uint64_t Imm) {
|
||||
return (Imm & 0x00ff0000ULL) >> 16;
|
||||
}
|
||||
|
||||
// abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh
|
||||
static inline bool isAdvSIMDModImmType9(uint64_t Imm) {
|
||||
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
|
||||
((Imm >> 48) == (Imm & 0x0000ffffULL)) &&
|
||||
((Imm >> 56) == (Imm & 0x000000ffULL));
|
||||
}
|
||||
|
||||
static inline uint8_t encodeAdvSIMDModImmType9(uint64_t Imm) {
|
||||
return (Imm & 0xffULL);
|
||||
}
|
||||
|
||||
static inline uint64_t decodeAdvSIMDModImmType9(uint8_t Imm) {
|
||||
uint64_t EncVal = Imm;
|
||||
EncVal |= (EncVal << 8);
|
||||
EncVal |= (EncVal << 16);
|
||||
EncVal |= (EncVal << 32);
|
||||
return EncVal;
|
||||
}
|
||||
|
||||
// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
|
||||
// cmode: 1110, op: 1
|
||||
static inline bool isAdvSIMDModImmType10(uint64_t Imm) {
|
||||
uint64_t ByteA = Imm & 0xff00000000000000ULL;
|
||||
uint64_t ByteB = Imm & 0x00ff000000000000ULL;
|
||||
uint64_t ByteC = Imm & 0x0000ff0000000000ULL;
|
||||
uint64_t ByteD = Imm & 0x000000ff00000000ULL;
|
||||
uint64_t ByteE = Imm & 0x00000000ff000000ULL;
|
||||
uint64_t ByteF = Imm & 0x0000000000ff0000ULL;
|
||||
uint64_t ByteG = Imm & 0x000000000000ff00ULL;
|
||||
uint64_t ByteH = Imm & 0x00000000000000ffULL;
|
||||
|
||||
return (ByteA == 0ULL || ByteA == 0xff00000000000000ULL) &&
|
||||
(ByteB == 0ULL || ByteB == 0x00ff000000000000ULL) &&
|
||||
(ByteC == 0ULL || ByteC == 0x0000ff0000000000ULL) &&
|
||||
(ByteD == 0ULL || ByteD == 0x000000ff00000000ULL) &&
|
||||
(ByteE == 0ULL || ByteE == 0x00000000ff000000ULL) &&
|
||||
(ByteF == 0ULL || ByteF == 0x0000000000ff0000ULL) &&
|
||||
(ByteG == 0ULL || ByteG == 0x000000000000ff00ULL) &&
|
||||
(ByteH == 0ULL || ByteH == 0x00000000000000ffULL);
|
||||
}
|
||||
|
||||
static inline uint8_t encodeAdvSIMDModImmType10(uint64_t Imm) {
|
||||
bool BitA = Imm & 0xff00000000000000ULL;
|
||||
bool BitB = Imm & 0x00ff000000000000ULL;
|
||||
bool BitC = Imm & 0x0000ff0000000000ULL;
|
||||
bool BitD = Imm & 0x000000ff00000000ULL;
|
||||
bool BitE = Imm & 0x00000000ff000000ULL;
|
||||
bool BitF = Imm & 0x0000000000ff0000ULL;
|
||||
bool BitG = Imm & 0x000000000000ff00ULL;
|
||||
bool BitH = Imm & 0x00000000000000ffULL;
|
||||
|
||||
unsigned EncVal = BitA;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitB;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitC;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitD;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitE;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitF;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitG;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitH;
|
||||
return EncVal;
|
||||
}
|
||||
|
||||
static inline uint64_t decodeAdvSIMDModImmType10(uint8_t Imm) {
|
||||
uint64_t EncVal = 0;
|
||||
if (Imm & 0x80) EncVal |= 0xff00000000000000ULL;
|
||||
if (Imm & 0x40) EncVal |= 0x00ff000000000000ULL;
|
||||
if (Imm & 0x20) EncVal |= 0x0000ff0000000000ULL;
|
||||
if (Imm & 0x10) EncVal |= 0x000000ff00000000ULL;
|
||||
if (Imm & 0x08) EncVal |= 0x00000000ff000000ULL;
|
||||
if (Imm & 0x04) EncVal |= 0x0000000000ff0000ULL;
|
||||
if (Imm & 0x02) EncVal |= 0x000000000000ff00ULL;
|
||||
if (Imm & 0x01) EncVal |= 0x00000000000000ffULL;
|
||||
return EncVal;
|
||||
}
|
||||
|
||||
// aBbbbbbc defgh000 0x00 0x00 aBbbbbbc defgh000 0x00 0x00
|
||||
static inline bool isAdvSIMDModImmType11(uint64_t Imm) {
|
||||
uint64_t BString = (Imm & 0x7E000000ULL) >> 25;
|
||||
return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
|
||||
(BString == 0x1f || BString == 0x20) &&
|
||||
((Imm & 0x0007ffff0007ffffULL) == 0);
|
||||
}
|
||||
|
||||
static inline uint8_t encodeAdvSIMDModImmType11(uint64_t Imm) {
|
||||
bool BitA = (Imm & 0x80000000ULL);
|
||||
bool BitB = (Imm & 0x20000000ULL);
|
||||
bool BitC = (Imm & 0x01000000ULL);
|
||||
bool BitD = (Imm & 0x00800000ULL);
|
||||
bool BitE = (Imm & 0x00400000ULL);
|
||||
bool BitF = (Imm & 0x00200000ULL);
|
||||
bool BitG = (Imm & 0x00100000ULL);
|
||||
bool BitH = (Imm & 0x00080000ULL);
|
||||
|
||||
unsigned EncVal = BitA;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitB;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitC;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitD;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitE;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitF;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitG;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitH;
|
||||
return EncVal;
|
||||
}
|
||||
|
||||
static inline uint64_t decodeAdvSIMDModImmType11(uint8_t Imm) {
|
||||
uint64_t EncVal = 0;
|
||||
if (Imm & 0x80) EncVal |= 0x80000000ULL;
|
||||
if (Imm & 0x40) EncVal |= 0x3e000000ULL;
|
||||
else EncVal |= 0x40000000ULL;
|
||||
if (Imm & 0x20) EncVal |= 0x01000000ULL;
|
||||
if (Imm & 0x10) EncVal |= 0x00800000ULL;
|
||||
if (Imm & 0x08) EncVal |= 0x00400000ULL;
|
||||
if (Imm & 0x04) EncVal |= 0x00200000ULL;
|
||||
if (Imm & 0x02) EncVal |= 0x00100000ULL;
|
||||
if (Imm & 0x01) EncVal |= 0x00080000ULL;
|
||||
return (EncVal << 32) | EncVal;
|
||||
}
|
||||
|
||||
// aBbbbbbb bbcdefgh 0x00 0x00 0x00 0x00 0x00 0x00
|
||||
static inline bool isAdvSIMDModImmType12(uint64_t Imm) {
|
||||
uint64_t BString = (Imm & 0x7fc0000000000000ULL) >> 54;
|
||||
return ((BString == 0xff || BString == 0x100) &&
|
||||
((Imm & 0x0000ffffffffffffULL) == 0));
|
||||
}
|
||||
|
||||
static inline uint8_t encodeAdvSIMDModImmType12(uint64_t Imm) {
|
||||
bool BitA = (Imm & 0x8000000000000000ULL);
|
||||
bool BitB = (Imm & 0x0040000000000000ULL);
|
||||
bool BitC = (Imm & 0x0020000000000000ULL);
|
||||
bool BitD = (Imm & 0x0010000000000000ULL);
|
||||
bool BitE = (Imm & 0x0008000000000000ULL);
|
||||
bool BitF = (Imm & 0x0004000000000000ULL);
|
||||
bool BitG = (Imm & 0x0002000000000000ULL);
|
||||
bool BitH = (Imm & 0x0001000000000000ULL);
|
||||
|
||||
unsigned EncVal = BitA;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitB;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitC;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitD;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitE;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitF;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitG;
|
||||
EncVal <<= 1;
|
||||
EncVal |= BitH;
|
||||
return EncVal;
|
||||
}
|
||||
|
||||
static inline uint64_t decodeAdvSIMDModImmType12(uint8_t Imm) {
|
||||
uint64_t EncVal = 0;
|
||||
if (Imm & 0x80) EncVal |= 0x8000000000000000ULL;
|
||||
if (Imm & 0x40) EncVal |= 0x3fc0000000000000ULL;
|
||||
else EncVal |= 0x4000000000000000ULL;
|
||||
if (Imm & 0x20) EncVal |= 0x0020000000000000ULL;
|
||||
if (Imm & 0x10) EncVal |= 0x0010000000000000ULL;
|
||||
if (Imm & 0x08) EncVal |= 0x0008000000000000ULL;
|
||||
if (Imm & 0x04) EncVal |= 0x0004000000000000ULL;
|
||||
if (Imm & 0x02) EncVal |= 0x0002000000000000ULL;
|
||||
if (Imm & 0x01) EncVal |= 0x0001000000000000ULL;
|
||||
return (EncVal << 32) | EncVal;
|
||||
}
|
||||
|
||||
} // end namespace ARM64_AM
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
533
lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp
Normal file
533
lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp
Normal file
@ -0,0 +1,533 @@
|
||||
//===-- ARM64AsmBackend.cpp - ARM64 Assembler Backend ---------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ARM64.h"
|
||||
#include "ARM64RegisterInfo.h"
|
||||
#include "MCTargetDesc/ARM64FixupKinds.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/MC/MCAsmBackend.h"
|
||||
#include "llvm/MC/MCDirectives.h"
|
||||
#include "llvm/MC/MCFixupKindInfo.h"
|
||||
#include "llvm/MC/MCObjectWriter.h"
|
||||
#include "llvm/MC/MCSectionMachO.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/MachO.h"
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class ARM64AsmBackend : public MCAsmBackend {
|
||||
static const unsigned PCRelFlagVal =
|
||||
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits | MCFixupKindInfo::FKF_IsPCRel;
|
||||
|
||||
public:
|
||||
ARM64AsmBackend(const Target &T) : MCAsmBackend() {}
|
||||
|
||||
unsigned getNumFixupKinds() const { return ARM64::NumTargetFixupKinds; }
|
||||
|
||||
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
|
||||
const static MCFixupKindInfo Infos[ARM64::NumTargetFixupKinds] = {
|
||||
// This table *must* be in the order that the fixup_* kinds are defined in
|
||||
// ARM64FixupKinds.h.
|
||||
//
|
||||
// Name Offset (bits) Size (bits) Flags
|
||||
{ "fixup_arm64_pcrel_adr_imm21", 0, 32, PCRelFlagVal },
|
||||
{ "fixup_arm64_pcrel_adrp_imm21", 0, 32, PCRelFlagVal },
|
||||
{ "fixup_arm64_add_imm12", 10, 12, 0 },
|
||||
{ "fixup_arm64_ldst_imm12_scale1", 10, 12, 0 },
|
||||
{ "fixup_arm64_ldst_imm12_scale2", 10, 12, 0 },
|
||||
{ "fixup_arm64_ldst_imm12_scale4", 10, 12, 0 },
|
||||
{ "fixup_arm64_ldst_imm12_scale8", 10, 12, 0 },
|
||||
{ "fixup_arm64_ldst_imm12_scale16", 10, 12, 0 },
|
||||
{ "fixup_arm64_movw", 5, 16, 0 },
|
||||
{ "fixup_arm64_pcrel_branch14", 5, 14, PCRelFlagVal },
|
||||
{ "fixup_arm64_pcrel_imm19", 5, 19, PCRelFlagVal },
|
||||
{ "fixup_arm64_pcrel_branch26", 0, 26, PCRelFlagVal },
|
||||
{ "fixup_arm64_pcrel_call26", 0, 26, PCRelFlagVal },
|
||||
{ "fixup_arm64_tlsdesc_call", 0, 0, 0 }
|
||||
};
|
||||
|
||||
if (Kind < FirstTargetFixupKind)
|
||||
return MCAsmBackend::getFixupKindInfo(Kind);
|
||||
|
||||
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
|
||||
"Invalid kind!");
|
||||
return Infos[Kind - FirstTargetFixupKind];
|
||||
}
|
||||
|
||||
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
|
||||
uint64_t Value, bool IsPCRel) const;
|
||||
|
||||
bool mayNeedRelaxation(const MCInst &Inst) const;
|
||||
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
|
||||
const MCRelaxableFragment *DF,
|
||||
const MCAsmLayout &Layout) const;
|
||||
void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
|
||||
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
|
||||
|
||||
void HandleAssemblerFlag(MCAssemblerFlag Flag) {}
|
||||
|
||||
unsigned getPointerSize() const { return 8; }
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
/// \brief The number of bytes the fixup may change.
|
||||
static unsigned getFixupKindNumBytes(unsigned Kind) {
|
||||
switch (Kind) {
|
||||
default:
|
||||
assert(0 && "Unknown fixup kind!");
|
||||
|
||||
case ARM64::fixup_arm64_tlsdesc_call:
|
||||
return 0;
|
||||
|
||||
case FK_Data_1:
|
||||
return 1;
|
||||
|
||||
case FK_Data_2:
|
||||
case ARM64::fixup_arm64_movw:
|
||||
return 2;
|
||||
|
||||
case ARM64::fixup_arm64_pcrel_branch14:
|
||||
case ARM64::fixup_arm64_add_imm12:
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale1:
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale2:
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale4:
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale8:
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale16:
|
||||
case ARM64::fixup_arm64_pcrel_imm19:
|
||||
return 3;
|
||||
|
||||
case ARM64::fixup_arm64_pcrel_adr_imm21:
|
||||
case ARM64::fixup_arm64_pcrel_adrp_imm21:
|
||||
case ARM64::fixup_arm64_pcrel_branch26:
|
||||
case ARM64::fixup_arm64_pcrel_call26:
|
||||
case FK_Data_4:
|
||||
return 4;
|
||||
|
||||
case FK_Data_8:
|
||||
return 8;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned AdrImmBits(unsigned Value) {
|
||||
unsigned lo2 = Value & 0x3;
|
||||
unsigned hi19 = (Value & 0x1ffffc) >> 2;
|
||||
return (hi19 << 5) | (lo2 << 29);
|
||||
}
|
||||
|
||||
static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
|
||||
int64_t SignedValue = static_cast<int64_t>(Value);
|
||||
switch (Kind) {
|
||||
default:
|
||||
assert(false && "Unknown fixup kind!");
|
||||
case ARM64::fixup_arm64_pcrel_adr_imm21:
|
||||
if (SignedValue > 2097151 || SignedValue < -2097152)
|
||||
report_fatal_error("fixup value out of range");
|
||||
return AdrImmBits(Value & 0x1fffffULL);
|
||||
case ARM64::fixup_arm64_pcrel_adrp_imm21:
|
||||
return AdrImmBits((Value & 0x1fffff000ULL) >> 12);
|
||||
case ARM64::fixup_arm64_pcrel_imm19:
|
||||
// Signed 21-bit immediate
|
||||
if (SignedValue > 2097151 || SignedValue < -2097152)
|
||||
report_fatal_error("fixup value out of range");
|
||||
// Low two bits are not encoded.
|
||||
return (Value >> 2) & 0x7ffff;
|
||||
case ARM64::fixup_arm64_add_imm12:
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale1:
|
||||
// Unsigned 12-bit immediate
|
||||
if (Value >= 0x1000)
|
||||
report_fatal_error("invalid imm12 fixup value");
|
||||
return Value;
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale2:
|
||||
// Unsigned 12-bit immediate which gets multiplied by 2
|
||||
if (Value & 1 || Value >= 0x2000)
|
||||
report_fatal_error("invalid imm12 fixup value");
|
||||
return Value >> 1;
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale4:
|
||||
// Unsigned 12-bit immediate which gets multiplied by 4
|
||||
if (Value & 3 || Value >= 0x4000)
|
||||
report_fatal_error("invalid imm12 fixup value");
|
||||
return Value >> 2;
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale8:
|
||||
// Unsigned 12-bit immediate which gets multiplied by 8
|
||||
if (Value & 7 || Value >= 0x8000)
|
||||
report_fatal_error("invalid imm12 fixup value");
|
||||
return Value >> 3;
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale16:
|
||||
// Unsigned 12-bit immediate which gets multiplied by 16
|
||||
if (Value & 15 || Value >= 0x10000)
|
||||
report_fatal_error("invalid imm12 fixup value");
|
||||
return Value >> 4;
|
||||
case ARM64::fixup_arm64_movw:
|
||||
report_fatal_error("no resolvable MOVZ/MOVK fixups supported yet");
|
||||
return Value;
|
||||
case ARM64::fixup_arm64_pcrel_branch14:
|
||||
// Signed 16-bit immediate
|
||||
if (SignedValue > 32767 || SignedValue < -32768)
|
||||
report_fatal_error("fixup value out of range");
|
||||
// Low two bits are not encoded (4-byte alignment assumed).
|
||||
if (Value & 0x3)
|
||||
report_fatal_error("fixup not sufficiently aligned");
|
||||
return (Value >> 2) & 0x3fff;
|
||||
case ARM64::fixup_arm64_pcrel_branch26:
|
||||
case ARM64::fixup_arm64_pcrel_call26:
|
||||
// Signed 28-bit immediate
|
||||
if (SignedValue > 134217727 || SignedValue < -134217728)
|
||||
report_fatal_error("fixup value out of range");
|
||||
// Low two bits are not encoded (4-byte alignment assumed).
|
||||
if (Value & 0x3)
|
||||
report_fatal_error("fixup not sufficiently aligned");
|
||||
return (Value >> 2) & 0x3ffffff;
|
||||
case FK_Data_1:
|
||||
case FK_Data_2:
|
||||
case FK_Data_4:
|
||||
case FK_Data_8:
|
||||
return Value;
|
||||
}
|
||||
}
|
||||
|
||||
void ARM64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
|
||||
unsigned DataSize, uint64_t Value,
|
||||
bool IsPCRel) const {
|
||||
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
|
||||
if (!Value)
|
||||
return; // Doesn't change encoding.
|
||||
MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
|
||||
// Apply any target-specific value adjustments.
|
||||
Value = adjustFixupValue(Fixup.getKind(), Value);
|
||||
|
||||
// Shift the value into position.
|
||||
Value <<= Info.TargetOffset;
|
||||
|
||||
unsigned Offset = Fixup.getOffset();
|
||||
assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
|
||||
|
||||
// For each byte of the fragment that the fixup touches, mask in the
|
||||
// bits from the fixup value.
|
||||
for (unsigned i = 0; i != NumBytes; ++i)
|
||||
Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
|
||||
}
|
||||
|
||||
bool ARM64AsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ARM64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
|
||||
const MCRelaxableFragment *DF,
|
||||
const MCAsmLayout &Layout) const {
|
||||
// FIXME: This isn't correct for ARM64. Just moving the "generic" logic
|
||||
// into the targets for now.
|
||||
//
|
||||
// Relax if the value is too big for a (signed) i8.
|
||||
return int64_t(Value) != int64_t(int8_t(Value));
|
||||
}
|
||||
|
||||
void ARM64AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
|
||||
assert(false && "ARM64AsmBackend::relaxInstruction() unimplemented");
|
||||
}
|
||||
|
||||
bool ARM64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
|
||||
// If the count is not 4-byte aligned, we must be writing data into the text
|
||||
// section (otherwise we have unaligned instructions, and thus have far
|
||||
// bigger problems), so just write zeros instead.
|
||||
if ((Count & 3) != 0) {
|
||||
for (uint64_t i = 0, e = (Count & 3); i != e; ++i)
|
||||
OW->Write8(0);
|
||||
}
|
||||
|
||||
// We are properly aligned, so write NOPs as requested.
|
||||
Count /= 4;
|
||||
for (uint64_t i = 0; i != Count; ++i)
|
||||
OW->Write32(0xd503201f);
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
namespace CU {
|
||||
|
||||
/// \brief Compact unwind encoding values.
|
||||
enum CompactUnwindEncodings {
|
||||
/// \brief A "frameless" leaf function, where no non-volatile registers are
|
||||
/// saved. The return remains in LR throughout the function.
|
||||
UNWIND_ARM64_MODE_FRAMELESS = 0x02000000,
|
||||
|
||||
/// \brief No compact unwind encoding available. Instead the low 23-bits of
|
||||
/// the compact unwind encoding is the offset of the DWARF FDE in the
|
||||
/// __eh_frame section. This mode is never used in object files. It is only
|
||||
/// generated by the linker in final linked images, which have only DWARF info
|
||||
/// for a function.
|
||||
UNWIND_ARM64_MODE_DWARF = 0x03000000,
|
||||
|
||||
/// \brief This is a standard arm64 prologue where FP/LR are immediately
|
||||
/// pushed on the stack, then SP is copied to FP. If there are any
|
||||
/// non-volatile register saved, they are copied into the stack fame in pairs
|
||||
/// in a contiguous ranger right below the saved FP/LR pair. Any subset of the
|
||||
/// five X pairs and four D pairs can be saved, but the memory layout must be
|
||||
/// in register number order.
|
||||
UNWIND_ARM64_MODE_FRAME = 0x04000000,
|
||||
|
||||
/// \brief Frame register pair encodings.
|
||||
UNWIND_ARM64_FRAME_X19_X20_PAIR = 0x00000001,
|
||||
UNWIND_ARM64_FRAME_X21_X22_PAIR = 0x00000002,
|
||||
UNWIND_ARM64_FRAME_X23_X24_PAIR = 0x00000004,
|
||||
UNWIND_ARM64_FRAME_X25_X26_PAIR = 0x00000008,
|
||||
UNWIND_ARM64_FRAME_X27_X28_PAIR = 0x00000010,
|
||||
UNWIND_ARM64_FRAME_D8_D9_PAIR = 0x00000100,
|
||||
UNWIND_ARM64_FRAME_D10_D11_PAIR = 0x00000200,
|
||||
UNWIND_ARM64_FRAME_D12_D13_PAIR = 0x00000400,
|
||||
UNWIND_ARM64_FRAME_D14_D15_PAIR = 0x00000800
|
||||
};
|
||||
|
||||
} // end CU namespace
|
||||
|
||||
// FIXME: This should be in a separate file.
|
||||
class DarwinARM64AsmBackend : public ARM64AsmBackend {
|
||||
const MCRegisterInfo &MRI;
|
||||
|
||||
/// \brief Encode compact unwind stack adjustment for frameless functions.
|
||||
/// See UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK in compact_unwind_encoding.h.
|
||||
/// The stack size always needs to be 16 byte aligned.
|
||||
uint32_t encodeStackAdjustment(uint32_t StackSize) const {
|
||||
return (StackSize / 16) << 12;
|
||||
}
|
||||
|
||||
public:
|
||||
DarwinARM64AsmBackend(const Target &T, const MCRegisterInfo &MRI)
|
||||
: ARM64AsmBackend(T), MRI(MRI) {}
|
||||
|
||||
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
|
||||
return createARM64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64,
|
||||
MachO::CPU_SUBTYPE_ARM64_ALL);
|
||||
}
|
||||
|
||||
virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
|
||||
// Any section for which the linker breaks things into atoms needs to
|
||||
// preserve symbols, including assembler local symbols, to identify
|
||||
// those atoms. These sections are:
|
||||
// Sections of type:
|
||||
//
|
||||
// S_CSTRING_LITERALS (e.g. __cstring)
|
||||
// S_LITERAL_POINTERS (e.g. objc selector pointers)
|
||||
// S_16BYTE_LITERALS, S_8BYTE_LITERALS, S_4BYTE_LITERALS
|
||||
//
|
||||
// Sections named:
|
||||
//
|
||||
// __TEXT,__eh_frame
|
||||
// __TEXT,__ustring
|
||||
// __DATA,__cfstring
|
||||
// __DATA,__objc_classrefs
|
||||
// __DATA,__objc_catlist
|
||||
//
|
||||
// FIXME: It would be better if the compiler used actual linker local
|
||||
// symbols for each of these sections rather than preserving what
|
||||
// are ostensibly assembler local symbols.
|
||||
const MCSectionMachO &SMO = static_cast<const MCSectionMachO &>(Section);
|
||||
return (SMO.getType() == MachO::S_CSTRING_LITERALS ||
|
||||
SMO.getType() == MachO::S_4BYTE_LITERALS ||
|
||||
SMO.getType() == MachO::S_8BYTE_LITERALS ||
|
||||
SMO.getType() == MachO::S_16BYTE_LITERALS ||
|
||||
SMO.getType() == MachO::S_LITERAL_POINTERS ||
|
||||
(SMO.getSegmentName() == "__TEXT" &&
|
||||
(SMO.getSectionName() == "__eh_frame" ||
|
||||
SMO.getSectionName() == "__ustring")) ||
|
||||
(SMO.getSegmentName() == "__DATA" &&
|
||||
(SMO.getSectionName() == "__cfstring" ||
|
||||
SMO.getSectionName() == "__objc_classrefs" ||
|
||||
SMO.getSectionName() == "__objc_catlist")));
|
||||
}
|
||||
|
||||
/// \brief Generate the compact unwind encoding from the CFI directives.
|
||||
virtual uint32_t
|
||||
generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const
|
||||
override {
|
||||
if (Instrs.empty())
|
||||
return CU::UNWIND_ARM64_MODE_FRAMELESS;
|
||||
|
||||
bool HasFP = false;
|
||||
unsigned StackSize = 0;
|
||||
|
||||
uint32_t CompactUnwindEncoding = 0;
|
||||
for (size_t i = 0, e = Instrs.size(); i != e; ++i) {
|
||||
const MCCFIInstruction &Inst = Instrs[i];
|
||||
|
||||
switch (Inst.getOperation()) {
|
||||
default:
|
||||
// Cannot handle this directive: bail out.
|
||||
return CU::UNWIND_ARM64_MODE_DWARF;
|
||||
case MCCFIInstruction::OpDefCfa: {
|
||||
// Defines a frame pointer.
|
||||
assert(getXRegFromWReg(MRI.getLLVMRegNum(Inst.getRegister(), true)) ==
|
||||
ARM64::FP &&
|
||||
"Invalid frame pointer!");
|
||||
assert(i + 2 < e && "Insufficient CFI instructions to define a frame!");
|
||||
|
||||
const MCCFIInstruction &LRPush = Instrs[++i];
|
||||
assert(LRPush.getOperation() == MCCFIInstruction::OpOffset &&
|
||||
"Link register not pushed!");
|
||||
const MCCFIInstruction &FPPush = Instrs[++i];
|
||||
assert(FPPush.getOperation() == MCCFIInstruction::OpOffset &&
|
||||
"Frame pointer not pushed!");
|
||||
|
||||
unsigned LRReg = MRI.getLLVMRegNum(LRPush.getRegister(), true);
|
||||
unsigned FPReg = MRI.getLLVMRegNum(FPPush.getRegister(), true);
|
||||
|
||||
LRReg = getXRegFromWReg(LRReg);
|
||||
FPReg = getXRegFromWReg(FPReg);
|
||||
|
||||
assert(LRReg == ARM64::LR && FPReg == ARM64::FP &&
|
||||
"Pushing invalid registers for frame!");
|
||||
|
||||
// Indicate that the function has a frame.
|
||||
CompactUnwindEncoding |= CU::UNWIND_ARM64_MODE_FRAME;
|
||||
HasFP = true;
|
||||
break;
|
||||
}
|
||||
case MCCFIInstruction::OpDefCfaOffset: {
|
||||
assert(StackSize == 0 && "We already have the CFA offset!");
|
||||
StackSize = std::abs(Inst.getOffset());
|
||||
break;
|
||||
}
|
||||
case MCCFIInstruction::OpOffset: {
|
||||
// Registers are saved in pairs. We expect there to be two consecutive
|
||||
// `.cfi_offset' instructions with the appropriate registers specified.
|
||||
unsigned Reg1 = MRI.getLLVMRegNum(Inst.getRegister(), true);
|
||||
if (i + 1 == e)
|
||||
return CU::UNWIND_ARM64_MODE_DWARF;
|
||||
|
||||
const MCCFIInstruction &Inst2 = Instrs[++i];
|
||||
if (Inst2.getOperation() != MCCFIInstruction::OpOffset)
|
||||
return CU::UNWIND_ARM64_MODE_DWARF;
|
||||
unsigned Reg2 = MRI.getLLVMRegNum(Inst2.getRegister(), true);
|
||||
|
||||
// N.B. The encodings must be in register number order, and the X
|
||||
// registers before the D registers.
|
||||
|
||||
// X19/X20 pair = 0x00000001,
|
||||
// X21/X22 pair = 0x00000002,
|
||||
// X23/X24 pair = 0x00000004,
|
||||
// X25/X26 pair = 0x00000008,
|
||||
// X27/X28 pair = 0x00000010
|
||||
Reg1 = getXRegFromWReg(Reg1);
|
||||
Reg2 = getXRegFromWReg(Reg2);
|
||||
|
||||
if (Reg1 == ARM64::X19 && Reg2 == ARM64::X20 &&
|
||||
(CompactUnwindEncoding & 0xF1E) == 0)
|
||||
CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X19_X20_PAIR;
|
||||
else if (Reg1 == ARM64::X21 && Reg2 == ARM64::X22 &&
|
||||
(CompactUnwindEncoding & 0xF1C) == 0)
|
||||
CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X21_X22_PAIR;
|
||||
else if (Reg1 == ARM64::X23 && Reg2 == ARM64::X24 &&
|
||||
(CompactUnwindEncoding & 0xF18) == 0)
|
||||
CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X23_X24_PAIR;
|
||||
else if (Reg1 == ARM64::X25 && Reg2 == ARM64::X26 &&
|
||||
(CompactUnwindEncoding & 0xF10) == 0)
|
||||
CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X25_X26_PAIR;
|
||||
else if (Reg1 == ARM64::X27 && Reg2 == ARM64::X28 &&
|
||||
(CompactUnwindEncoding & 0xF00) == 0)
|
||||
CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X27_X28_PAIR;
|
||||
else {
|
||||
Reg1 = getDRegFromBReg(Reg1);
|
||||
Reg2 = getDRegFromBReg(Reg2);
|
||||
|
||||
// D8/D9 pair = 0x00000100,
|
||||
// D10/D11 pair = 0x00000200,
|
||||
// D12/D13 pair = 0x00000400,
|
||||
// D14/D15 pair = 0x00000800
|
||||
if (Reg1 == ARM64::D8 && Reg2 == ARM64::D9 &&
|
||||
(CompactUnwindEncoding & 0xE00) == 0)
|
||||
CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D8_D9_PAIR;
|
||||
else if (Reg1 == ARM64::D10 && Reg2 == ARM64::D11 &&
|
||||
(CompactUnwindEncoding & 0xC00) == 0)
|
||||
CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D10_D11_PAIR;
|
||||
else if (Reg1 == ARM64::D12 && Reg2 == ARM64::D13 &&
|
||||
(CompactUnwindEncoding & 0x800) == 0)
|
||||
CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D12_D13_PAIR;
|
||||
else if (Reg1 == ARM64::D14 && Reg2 == ARM64::D15)
|
||||
CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D14_D15_PAIR;
|
||||
else
|
||||
// A pair was pushed which we cannot handle.
|
||||
return CU::UNWIND_ARM64_MODE_DWARF;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!HasFP) {
|
||||
// With compact unwind info we can only represent stack adjustments of up
|
||||
// to 65520 bytes.
|
||||
if (StackSize > 65520)
|
||||
return CU::UNWIND_ARM64_MODE_DWARF;
|
||||
|
||||
CompactUnwindEncoding |= CU::UNWIND_ARM64_MODE_FRAMELESS;
|
||||
CompactUnwindEncoding |= encodeStackAdjustment(StackSize);
|
||||
}
|
||||
|
||||
return CompactUnwindEncoding;
|
||||
}
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
namespace {
|
||||
|
||||
class ELFARM64AsmBackend : public ARM64AsmBackend {
|
||||
public:
|
||||
uint8_t OSABI;
|
||||
|
||||
ELFARM64AsmBackend(const Target &T, uint8_t OSABI)
|
||||
: ARM64AsmBackend(T), OSABI(OSABI) {}
|
||||
|
||||
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
|
||||
return createARM64ELFObjectWriter(OS, OSABI);
|
||||
}
|
||||
|
||||
void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
|
||||
const MCFixup &Fixup, const MCFragment *DF,
|
||||
const MCValue &Target, uint64_t &Value,
|
||||
bool &IsResolved) override;
|
||||
};
|
||||
|
||||
void ELFARM64AsmBackend::processFixupValue(const MCAssembler &Asm,
|
||||
const MCAsmLayout &Layout,
|
||||
const MCFixup &Fixup,
|
||||
const MCFragment *DF,
|
||||
const MCValue &Target,
|
||||
uint64_t &Value, bool &IsResolved) {
|
||||
// The ADRP instruction adds some multiple of 0x1000 to the current PC &
|
||||
// ~0xfff. This means that the required offset to reach a symbol can vary by
|
||||
// up to one step depending on where the ADRP is in memory. For example:
|
||||
//
|
||||
// ADRP x0, there
|
||||
// there:
|
||||
//
|
||||
// If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and
|
||||
// we'll need that as an offset. At any other address "there" will be in the
|
||||
// same page as the ADRP and the instruction should encode 0x0. Assuming the
|
||||
// section isn't 0x1000-aligned, we therefore need to delegate this decision
|
||||
// to the linker -- a relocation!
|
||||
if ((uint32_t)Fixup.getKind() == ARM64::fixup_arm64_pcrel_adrp_imm21)
|
||||
IsResolved = false;
|
||||
}
|
||||
}
|
||||
|
||||
MCAsmBackend *llvm::createARM64AsmBackend(const Target &T,
|
||||
const MCRegisterInfo &MRI,
|
||||
StringRef TT, StringRef CPU) {
|
||||
Triple TheTriple(TT);
|
||||
|
||||
if (TheTriple.isOSDarwin())
|
||||
return new DarwinARM64AsmBackend(T, MRI);
|
||||
|
||||
assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target");
|
||||
return new ELFARM64AsmBackend(T, TheTriple.getOS());
|
||||
}
|
998
lib/Target/ARM64/MCTargetDesc/ARM64BaseInfo.h
Normal file
998
lib/Target/ARM64/MCTargetDesc/ARM64BaseInfo.h
Normal file
@ -0,0 +1,998 @@
|
||||
//===-- ARM64BaseInfo.h - Top level definitions for ARM64 -------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains small standalone helper functions and enum definitions for
|
||||
// the ARM64 target useful for the compiler back-end and the MC libraries.
|
||||
// As such, it deliberately does not include references to LLVM core
|
||||
// code gen types, passes, etc..
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef ARM64BASEINFO_H
|
||||
#define ARM64BASEINFO_H
|
||||
|
||||
#include "ARM64MCTargetDesc.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
inline static unsigned getWRegFromXReg(unsigned Reg) {
|
||||
switch (Reg) {
|
||||
case ARM64::X0: return ARM64::W0;
|
||||
case ARM64::X1: return ARM64::W1;
|
||||
case ARM64::X2: return ARM64::W2;
|
||||
case ARM64::X3: return ARM64::W3;
|
||||
case ARM64::X4: return ARM64::W4;
|
||||
case ARM64::X5: return ARM64::W5;
|
||||
case ARM64::X6: return ARM64::W6;
|
||||
case ARM64::X7: return ARM64::W7;
|
||||
case ARM64::X8: return ARM64::W8;
|
||||
case ARM64::X9: return ARM64::W9;
|
||||
case ARM64::X10: return ARM64::W10;
|
||||
case ARM64::X11: return ARM64::W11;
|
||||
case ARM64::X12: return ARM64::W12;
|
||||
case ARM64::X13: return ARM64::W13;
|
||||
case ARM64::X14: return ARM64::W14;
|
||||
case ARM64::X15: return ARM64::W15;
|
||||
case ARM64::X16: return ARM64::W16;
|
||||
case ARM64::X17: return ARM64::W17;
|
||||
case ARM64::X18: return ARM64::W18;
|
||||
case ARM64::X19: return ARM64::W19;
|
||||
case ARM64::X20: return ARM64::W20;
|
||||
case ARM64::X21: return ARM64::W21;
|
||||
case ARM64::X22: return ARM64::W22;
|
||||
case ARM64::X23: return ARM64::W23;
|
||||
case ARM64::X24: return ARM64::W24;
|
||||
case ARM64::X25: return ARM64::W25;
|
||||
case ARM64::X26: return ARM64::W26;
|
||||
case ARM64::X27: return ARM64::W27;
|
||||
case ARM64::X28: return ARM64::W28;
|
||||
case ARM64::FP: return ARM64::W29;
|
||||
case ARM64::LR: return ARM64::W30;
|
||||
case ARM64::SP: return ARM64::WSP;
|
||||
case ARM64::XZR: return ARM64::WZR;
|
||||
}
|
||||
// For anything else, return it unchanged.
|
||||
return Reg;
|
||||
}
|
||||
|
||||
inline static unsigned getXRegFromWReg(unsigned Reg) {
|
||||
switch (Reg) {
|
||||
case ARM64::W0: return ARM64::X0;
|
||||
case ARM64::W1: return ARM64::X1;
|
||||
case ARM64::W2: return ARM64::X2;
|
||||
case ARM64::W3: return ARM64::X3;
|
||||
case ARM64::W4: return ARM64::X4;
|
||||
case ARM64::W5: return ARM64::X5;
|
||||
case ARM64::W6: return ARM64::X6;
|
||||
case ARM64::W7: return ARM64::X7;
|
||||
case ARM64::W8: return ARM64::X8;
|
||||
case ARM64::W9: return ARM64::X9;
|
||||
case ARM64::W10: return ARM64::X10;
|
||||
case ARM64::W11: return ARM64::X11;
|
||||
case ARM64::W12: return ARM64::X12;
|
||||
case ARM64::W13: return ARM64::X13;
|
||||
case ARM64::W14: return ARM64::X14;
|
||||
case ARM64::W15: return ARM64::X15;
|
||||
case ARM64::W16: return ARM64::X16;
|
||||
case ARM64::W17: return ARM64::X17;
|
||||
case ARM64::W18: return ARM64::X18;
|
||||
case ARM64::W19: return ARM64::X19;
|
||||
case ARM64::W20: return ARM64::X20;
|
||||
case ARM64::W21: return ARM64::X21;
|
||||
case ARM64::W22: return ARM64::X22;
|
||||
case ARM64::W23: return ARM64::X23;
|
||||
case ARM64::W24: return ARM64::X24;
|
||||
case ARM64::W25: return ARM64::X25;
|
||||
case ARM64::W26: return ARM64::X26;
|
||||
case ARM64::W27: return ARM64::X27;
|
||||
case ARM64::W28: return ARM64::X28;
|
||||
case ARM64::W29: return ARM64::FP;
|
||||
case ARM64::W30: return ARM64::LR;
|
||||
case ARM64::WSP: return ARM64::SP;
|
||||
case ARM64::WZR: return ARM64::XZR;
|
||||
}
|
||||
// For anything else, return it unchanged.
|
||||
return Reg;
|
||||
}
|
||||
|
||||
static inline unsigned getBRegFromDReg(unsigned Reg) {
|
||||
switch (Reg) {
|
||||
case ARM64::D0: return ARM64::B0;
|
||||
case ARM64::D1: return ARM64::B1;
|
||||
case ARM64::D2: return ARM64::B2;
|
||||
case ARM64::D3: return ARM64::B3;
|
||||
case ARM64::D4: return ARM64::B4;
|
||||
case ARM64::D5: return ARM64::B5;
|
||||
case ARM64::D6: return ARM64::B6;
|
||||
case ARM64::D7: return ARM64::B7;
|
||||
case ARM64::D8: return ARM64::B8;
|
||||
case ARM64::D9: return ARM64::B9;
|
||||
case ARM64::D10: return ARM64::B10;
|
||||
case ARM64::D11: return ARM64::B11;
|
||||
case ARM64::D12: return ARM64::B12;
|
||||
case ARM64::D13: return ARM64::B13;
|
||||
case ARM64::D14: return ARM64::B14;
|
||||
case ARM64::D15: return ARM64::B15;
|
||||
case ARM64::D16: return ARM64::B16;
|
||||
case ARM64::D17: return ARM64::B17;
|
||||
case ARM64::D18: return ARM64::B18;
|
||||
case ARM64::D19: return ARM64::B19;
|
||||
case ARM64::D20: return ARM64::B20;
|
||||
case ARM64::D21: return ARM64::B21;
|
||||
case ARM64::D22: return ARM64::B22;
|
||||
case ARM64::D23: return ARM64::B23;
|
||||
case ARM64::D24: return ARM64::B24;
|
||||
case ARM64::D25: return ARM64::B25;
|
||||
case ARM64::D26: return ARM64::B26;
|
||||
case ARM64::D27: return ARM64::B27;
|
||||
case ARM64::D28: return ARM64::B28;
|
||||
case ARM64::D29: return ARM64::B29;
|
||||
case ARM64::D30: return ARM64::B30;
|
||||
case ARM64::D31: return ARM64::B31;
|
||||
}
|
||||
// For anything else, return it unchanged.
|
||||
return Reg;
|
||||
}
|
||||
|
||||
|
||||
static inline unsigned getDRegFromBReg(unsigned Reg) {
|
||||
switch (Reg) {
|
||||
case ARM64::B0: return ARM64::D0;
|
||||
case ARM64::B1: return ARM64::D1;
|
||||
case ARM64::B2: return ARM64::D2;
|
||||
case ARM64::B3: return ARM64::D3;
|
||||
case ARM64::B4: return ARM64::D4;
|
||||
case ARM64::B5: return ARM64::D5;
|
||||
case ARM64::B6: return ARM64::D6;
|
||||
case ARM64::B7: return ARM64::D7;
|
||||
case ARM64::B8: return ARM64::D8;
|
||||
case ARM64::B9: return ARM64::D9;
|
||||
case ARM64::B10: return ARM64::D10;
|
||||
case ARM64::B11: return ARM64::D11;
|
||||
case ARM64::B12: return ARM64::D12;
|
||||
case ARM64::B13: return ARM64::D13;
|
||||
case ARM64::B14: return ARM64::D14;
|
||||
case ARM64::B15: return ARM64::D15;
|
||||
case ARM64::B16: return ARM64::D16;
|
||||
case ARM64::B17: return ARM64::D17;
|
||||
case ARM64::B18: return ARM64::D18;
|
||||
case ARM64::B19: return ARM64::D19;
|
||||
case ARM64::B20: return ARM64::D20;
|
||||
case ARM64::B21: return ARM64::D21;
|
||||
case ARM64::B22: return ARM64::D22;
|
||||
case ARM64::B23: return ARM64::D23;
|
||||
case ARM64::B24: return ARM64::D24;
|
||||
case ARM64::B25: return ARM64::D25;
|
||||
case ARM64::B26: return ARM64::D26;
|
||||
case ARM64::B27: return ARM64::D27;
|
||||
case ARM64::B28: return ARM64::D28;
|
||||
case ARM64::B29: return ARM64::D29;
|
||||
case ARM64::B30: return ARM64::D30;
|
||||
case ARM64::B31: return ARM64::D31;
|
||||
}
|
||||
// For anything else, return it unchanged.
|
||||
return Reg;
|
||||
}
|
||||
|
||||
namespace ARM64CC {
|
||||
|
||||
// The CondCodes constants map directly to the 4-bit encoding of the condition
|
||||
// field for predicated instructions.
|
||||
enum CondCode { // Meaning (integer) Meaning (floating-point)
|
||||
EQ = 0x0, // Equal Equal
|
||||
NE = 0x1, // Not equal Not equal, or unordered
|
||||
CS = 0x2, // Carry set >, ==, or unordered
|
||||
CC = 0x3, // Carry clear Less than
|
||||
MI = 0x4, // Minus, negative Less than
|
||||
PL = 0x5, // Plus, positive or zero >, ==, or unordered
|
||||
VS = 0x6, // Overflow Unordered
|
||||
VC = 0x7, // No overflow Not unordered
|
||||
HI = 0x8, // Unsigned higher Greater than, or unordered
|
||||
LS = 0x9, // Unsigned lower or same Less than or equal
|
||||
GE = 0xa, // Greater than or equal Greater than or equal
|
||||
LT = 0xb, // Less than Less than, or unordered
|
||||
GT = 0xc, // Greater than Greater than
|
||||
LE = 0xd, // Less than or equal <, ==, or unordered
|
||||
AL = 0xe // Always (unconditional) Always (unconditional)
|
||||
};
|
||||
|
||||
inline static const char *getCondCodeName(CondCode Code) {
|
||||
// cond<0> is ignored when cond<3:1> = 111, where 1110 is 0xe (aka AL).
|
||||
if ((Code & AL) == AL)
|
||||
Code = AL;
|
||||
switch (Code) {
|
||||
case EQ: return "eq";
|
||||
case NE: return "ne";
|
||||
case CS: return "cs";
|
||||
case CC: return "cc";
|
||||
case MI: return "mi";
|
||||
case PL: return "pl";
|
||||
case VS: return "vs";
|
||||
case VC: return "vc";
|
||||
case HI: return "hi";
|
||||
case LS: return "ls";
|
||||
case GE: return "ge";
|
||||
case LT: return "lt";
|
||||
case GT: return "gt";
|
||||
case LE: return "le";
|
||||
case AL: return "al";
|
||||
}
|
||||
llvm_unreachable("Unknown condition code");
|
||||
}
|
||||
|
||||
inline static CondCode getInvertedCondCode(CondCode Code) {
|
||||
switch (Code) {
|
||||
default: llvm_unreachable("Unknown condition code");
|
||||
case EQ: return NE;
|
||||
case NE: return EQ;
|
||||
case CS: return CC;
|
||||
case CC: return CS;
|
||||
case MI: return PL;
|
||||
case PL: return MI;
|
||||
case VS: return VC;
|
||||
case VC: return VS;
|
||||
case HI: return LS;
|
||||
case LS: return HI;
|
||||
case GE: return LT;
|
||||
case LT: return GE;
|
||||
case GT: return LE;
|
||||
case LE: return GT;
|
||||
}
|
||||
}
|
||||
|
||||
/// Given a condition code, return NZCV flags that would satisfy that condition.
|
||||
/// The flag bits are in the format expected by the ccmp instructions.
|
||||
/// Note that many different flag settings can satisfy a given condition code,
|
||||
/// this function just returns one of them.
|
||||
inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) {
|
||||
// NZCV flags encoded as expected by ccmp instructions, ARMv8 ISA 5.5.7.
|
||||
enum { N = 8, Z = 4, C = 2, V = 1 };
|
||||
switch (Code) {
|
||||
default: llvm_unreachable("Unknown condition code");
|
||||
case EQ: return Z; // Z == 1
|
||||
case NE: return 0; // Z == 0
|
||||
case CS: return C; // C == 1
|
||||
case CC: return 0; // C == 0
|
||||
case MI: return N; // N == 1
|
||||
case PL: return 0; // N == 0
|
||||
case VS: return V; // V == 1
|
||||
case VC: return 0; // V == 0
|
||||
case HI: return C; // C == 1 && Z == 0
|
||||
case LS: return 0; // C == 0 || Z == 1
|
||||
case GE: return 0; // N == V
|
||||
case LT: return N; // N != V
|
||||
case GT: return 0; // Z == 0 && N == V
|
||||
case LE: return Z; // Z == 1 || N != V
|
||||
}
|
||||
}
|
||||
} // end namespace ARM64CC
|
||||
|
||||
namespace ARM64SYS {
|
||||
enum BarrierOption {
|
||||
InvalidBarrier = 0xff,
|
||||
OSHLD = 0x1,
|
||||
OSHST = 0x2,
|
||||
OSH = 0x3,
|
||||
NSHLD = 0x5,
|
||||
NSHST = 0x6,
|
||||
NSH = 0x7,
|
||||
ISHLD = 0x9,
|
||||
ISHST = 0xa,
|
||||
ISH = 0xb,
|
||||
LD = 0xd,
|
||||
ST = 0xe,
|
||||
SY = 0xf
|
||||
};
|
||||
|
||||
inline static const char *getBarrierOptName(BarrierOption Opt) {
|
||||
switch (Opt) {
|
||||
default: return NULL;
|
||||
case 0x1: return "oshld";
|
||||
case 0x2: return "oshst";
|
||||
case 0x3: return "osh";
|
||||
case 0x5: return "nshld";
|
||||
case 0x6: return "nshst";
|
||||
case 0x7: return "nsh";
|
||||
case 0x9: return "ishld";
|
||||
case 0xa: return "ishst";
|
||||
case 0xb: return "ish";
|
||||
case 0xd: return "ld";
|
||||
case 0xe: return "st";
|
||||
case 0xf: return "sy";
|
||||
}
|
||||
}
|
||||
|
||||
#define A64_SYSREG_ENC(op0,CRn,op2,CRm,op1) ((op0) << 14 | (op1) << 11 | \
|
||||
(CRn) << 7 | (CRm) << 3 | (op2))
|
||||
enum SystemRegister {
|
||||
InvalidSystemReg = 0,
|
||||
// Table in section 3.10.3
|
||||
SPSR_EL1 = 0xc200,
|
||||
SPSR_svc = SPSR_EL1,
|
||||
ELR_EL1 = 0xc201,
|
||||
SP_EL0 = 0xc208,
|
||||
SPSel = 0xc210,
|
||||
CurrentEL = 0xc212,
|
||||
DAIF = 0xda11,
|
||||
NZCV = 0xda10,
|
||||
FPCR = 0xda20,
|
||||
FPSR = 0xda21,
|
||||
DSPSR = 0xda28,
|
||||
DLR = 0xda29,
|
||||
SPSR_EL2 = 0xe200,
|
||||
SPSR_hyp = SPSR_EL2,
|
||||
ELR_EL2 = 0xe201,
|
||||
SP_EL1 = 0xe208,
|
||||
SPSR_irq = 0xe218,
|
||||
SPSR_abt = 0xe219,
|
||||
SPSR_und = 0xe21a,
|
||||
SPSR_fiq = 0xe21b,
|
||||
SPSR_EL3 = 0xf200,
|
||||
ELR_EL3 = 0xf201,
|
||||
SP_EL2 = 0xf208,
|
||||
|
||||
|
||||
// Table in section 3.10.8
|
||||
MIDR_EL1 = 0xc000,
|
||||
CTR_EL0 = 0xd801,
|
||||
MPIDR_EL1 = 0xc005,
|
||||
ECOIDR_EL1 = 0xc006,
|
||||
DCZID_EL0 = 0xd807,
|
||||
MVFR0_EL1 = 0xc018,
|
||||
MVFR1_EL1 = 0xc019,
|
||||
ID_AA64PFR0_EL1 = 0xc020,
|
||||
ID_AA64PFR1_EL1 = 0xc021,
|
||||
ID_AA64DFR0_EL1 = 0xc028,
|
||||
ID_AA64DFR1_EL1 = 0xc029,
|
||||
ID_AA64ISAR0_EL1 = 0xc030,
|
||||
ID_AA64ISAR1_EL1 = 0xc031,
|
||||
ID_AA64MMFR0_EL1 = 0xc038,
|
||||
ID_AA64MMFR1_EL1 = 0xc039,
|
||||
CCSIDR_EL1 = 0xc800,
|
||||
CLIDR_EL1 = 0xc801,
|
||||
AIDR_EL1 = 0xc807,
|
||||
CSSELR_EL1 = 0xd000,
|
||||
VPIDR_EL2 = 0xe000,
|
||||
VMPIDR_EL2 = 0xe005,
|
||||
SCTLR_EL1 = 0xc080,
|
||||
SCTLR_EL2 = 0xe080,
|
||||
SCTLR_EL3 = 0xf080,
|
||||
ACTLR_EL1 = 0xc081,
|
||||
ACTLR_EL2 = 0xe081,
|
||||
ACTLR_EL3 = 0xf081,
|
||||
CPACR_EL1 = 0xc082,
|
||||
CPTR_EL2 = 0xe08a,
|
||||
CPTR_EL3 = 0xf08a,
|
||||
SCR_EL3 = 0xf088,
|
||||
HCR_EL2 = 0xe088,
|
||||
MDCR_EL2 = 0xe089,
|
||||
MDCR_EL3 = 0xf099,
|
||||
HSTR_EL2 = 0xe08b,
|
||||
HACR_EL2 = 0xe08f,
|
||||
TTBR0_EL1 = 0xc100,
|
||||
TTBR1_EL1 = 0xc101,
|
||||
TTBR0_EL2 = 0xe100,
|
||||
TTBR0_EL3 = 0xf100,
|
||||
VTTBR_EL2 = 0xe108,
|
||||
TCR_EL1 = 0xc102,
|
||||
TCR_EL2 = 0xe102,
|
||||
TCR_EL3 = 0xf102,
|
||||
VTCR_EL2 = 0xe10a,
|
||||
ADFSR_EL1 = 0xc288,
|
||||
AIFSR_EL1 = 0xc289,
|
||||
ADFSR_EL2 = 0xe288,
|
||||
AIFSR_EL2 = 0xe289,
|
||||
ADFSR_EL3 = 0xf288,
|
||||
AIFSR_EL3 = 0xf289,
|
||||
ESR_EL1 = 0xc290,
|
||||
ESR_EL2 = 0xe290,
|
||||
ESR_EL3 = 0xf290,
|
||||
FAR_EL1 = 0xc300,
|
||||
FAR_EL2 = 0xe300,
|
||||
FAR_EL3 = 0xf300,
|
||||
HPFAR_EL2 = 0xe304,
|
||||
PAR_EL1 = 0xc3a0,
|
||||
MAIR_EL1 = 0xc510,
|
||||
MAIR_EL2 = 0xe510,
|
||||
MAIR_EL3 = 0xf510,
|
||||
AMAIR_EL1 = 0xc518,
|
||||
AMAIR_EL2 = 0xe518,
|
||||
AMAIR_EL3 = 0xf518,
|
||||
VBAR_EL1 = 0xc600,
|
||||
VBAR_EL2 = 0xe600,
|
||||
VBAR_EL3 = 0xf600,
|
||||
RVBAR_EL1 = 0xc601,
|
||||
RVBAR_EL2 = 0xe601,
|
||||
RVBAR_EL3 = 0xf601,
|
||||
ISR_EL1 = 0xc608,
|
||||
CONTEXTIDR_EL1 = 0xc681,
|
||||
TPIDR_EL0 = 0xde82,
|
||||
TPIDRRO_EL0 = 0xde83,
|
||||
TPIDR_EL1 = 0xc684,
|
||||
TPIDR_EL2 = 0xe682,
|
||||
TPIDR_EL3 = 0xf682,
|
||||
TEECR32_EL1 = 0x9000,
|
||||
CNTFRQ_EL0 = 0xdf00,
|
||||
CNTPCT_EL0 = 0xdf01,
|
||||
CNTVCT_EL0 = 0xdf02,
|
||||
CNTVOFF_EL2 = 0xe703,
|
||||
CNTKCTL_EL1 = 0xc708,
|
||||
CNTHCTL_EL2 = 0xe708,
|
||||
CNTP_TVAL_EL0 = 0xdf10,
|
||||
CNTP_CTL_EL0 = 0xdf11,
|
||||
CNTP_CVAL_EL0 = 0xdf12,
|
||||
CNTV_TVAL_EL0 = 0xdf18,
|
||||
CNTV_CTL_EL0 = 0xdf19,
|
||||
CNTV_CVAL_EL0 = 0xdf1a,
|
||||
CNTHP_TVAL_EL2 = 0xe710,
|
||||
CNTHP_CTL_EL2 = 0xe711,
|
||||
CNTHP_CVAL_EL2 = 0xe712,
|
||||
CNTPS_TVAL_EL1 = 0xff10,
|
||||
CNTPS_CTL_EL1 = 0xff11,
|
||||
CNTPS_CVAL_EL1= 0xff12,
|
||||
|
||||
PMEVCNTR0_EL0 = 0xdf40,
|
||||
PMEVCNTR1_EL0 = 0xdf41,
|
||||
PMEVCNTR2_EL0 = 0xdf42,
|
||||
PMEVCNTR3_EL0 = 0xdf43,
|
||||
PMEVCNTR4_EL0 = 0xdf44,
|
||||
PMEVCNTR5_EL0 = 0xdf45,
|
||||
PMEVCNTR6_EL0 = 0xdf46,
|
||||
PMEVCNTR7_EL0 = 0xdf47,
|
||||
PMEVCNTR8_EL0 = 0xdf48,
|
||||
PMEVCNTR9_EL0 = 0xdf49,
|
||||
PMEVCNTR10_EL0 = 0xdf4a,
|
||||
PMEVCNTR11_EL0 = 0xdf4b,
|
||||
PMEVCNTR12_EL0 = 0xdf4c,
|
||||
PMEVCNTR13_EL0 = 0xdf4d,
|
||||
PMEVCNTR14_EL0 = 0xdf4e,
|
||||
PMEVCNTR15_EL0 = 0xdf4f,
|
||||
PMEVCNTR16_EL0 = 0xdf50,
|
||||
PMEVCNTR17_EL0 = 0xdf51,
|
||||
PMEVCNTR18_EL0 = 0xdf52,
|
||||
PMEVCNTR19_EL0 = 0xdf53,
|
||||
PMEVCNTR20_EL0 = 0xdf54,
|
||||
PMEVCNTR21_EL0 = 0xdf55,
|
||||
PMEVCNTR22_EL0 = 0xdf56,
|
||||
PMEVCNTR23_EL0 = 0xdf57,
|
||||
PMEVCNTR24_EL0 = 0xdf58,
|
||||
PMEVCNTR25_EL0 = 0xdf59,
|
||||
PMEVCNTR26_EL0 = 0xdf5a,
|
||||
PMEVCNTR27_EL0 = 0xdf5b,
|
||||
PMEVCNTR28_EL0 = 0xdf5c,
|
||||
PMEVCNTR29_EL0 = 0xdf5d,
|
||||
PMEVCNTR30_EL0 = 0xdf5e,
|
||||
|
||||
PMEVTYPER0_EL0 = 0xdf60,
|
||||
PMEVTYPER1_EL0 = 0xdf61,
|
||||
PMEVTYPER2_EL0 = 0xdf62,
|
||||
PMEVTYPER3_EL0 = 0xdf63,
|
||||
PMEVTYPER4_EL0 = 0xdf64,
|
||||
PMEVTYPER5_EL0 = 0xdf65,
|
||||
PMEVTYPER6_EL0 = 0xdf66,
|
||||
PMEVTYPER7_EL0 = 0xdf67,
|
||||
PMEVTYPER8_EL0 = 0xdf68,
|
||||
PMEVTYPER9_EL0 = 0xdf69,
|
||||
PMEVTYPER10_EL0 = 0xdf6a,
|
||||
PMEVTYPER11_EL0 = 0xdf6b,
|
||||
PMEVTYPER12_EL0 = 0xdf6c,
|
||||
PMEVTYPER13_EL0 = 0xdf6d,
|
||||
PMEVTYPER14_EL0 = 0xdf6e,
|
||||
PMEVTYPER15_EL0 = 0xdf6f,
|
||||
PMEVTYPER16_EL0 = 0xdf70,
|
||||
PMEVTYPER17_EL0 = 0xdf71,
|
||||
PMEVTYPER18_EL0 = 0xdf72,
|
||||
PMEVTYPER19_EL0 = 0xdf73,
|
||||
PMEVTYPER20_EL0 = 0xdf74,
|
||||
PMEVTYPER21_EL0 = 0xdf75,
|
||||
PMEVTYPER22_EL0 = 0xdf76,
|
||||
PMEVTYPER23_EL0 = 0xdf77,
|
||||
PMEVTYPER24_EL0 = 0xdf78,
|
||||
PMEVTYPER25_EL0 = 0xdf79,
|
||||
PMEVTYPER26_EL0 = 0xdf7a,
|
||||
PMEVTYPER27_EL0 = 0xdf7b,
|
||||
PMEVTYPER28_EL0 = 0xdf7c,
|
||||
PMEVTYPER29_EL0 = 0xdf7d,
|
||||
PMEVTYPER30_EL0 = 0xdf7e,
|
||||
|
||||
PMCCFILTR_EL0 = 0xdf7f,
|
||||
|
||||
RMR_EL3 = 0xf602,
|
||||
RMR_EL2 = 0xd602,
|
||||
RMR_EL1 = 0xce02,
|
||||
|
||||
// Debug Architecture 5.3, Table 17.
|
||||
MDCCSR_EL0 = A64_SYSREG_ENC(2, 0, 0, 1, 3),
|
||||
MDCCINT_EL1 = A64_SYSREG_ENC(2, 0, 0, 2, 0),
|
||||
DBGDTR_EL0 = A64_SYSREG_ENC(2, 0, 0, 4, 3),
|
||||
DBGDTRRX_EL0 = A64_SYSREG_ENC(2, 0, 0, 5, 3),
|
||||
DBGDTRTX_EL0 = DBGDTRRX_EL0,
|
||||
DBGVCR32_EL2 = A64_SYSREG_ENC(2, 0, 0, 7, 4),
|
||||
OSDTRRX_EL1 = A64_SYSREG_ENC(2, 0, 2, 0, 0),
|
||||
MDSCR_EL1 = A64_SYSREG_ENC(2, 0, 2, 2, 0),
|
||||
OSDTRTX_EL1 = A64_SYSREG_ENC(2, 0, 2, 3, 0),
|
||||
OSECCR_EL11 = A64_SYSREG_ENC(2, 0, 2, 6, 0),
|
||||
|
||||
DBGBVR0_EL1 = A64_SYSREG_ENC(2, 0, 4, 0, 0),
|
||||
DBGBVR1_EL1 = A64_SYSREG_ENC(2, 0, 4, 1, 0),
|
||||
DBGBVR2_EL1 = A64_SYSREG_ENC(2, 0, 4, 2, 0),
|
||||
DBGBVR3_EL1 = A64_SYSREG_ENC(2, 0, 4, 3, 0),
|
||||
DBGBVR4_EL1 = A64_SYSREG_ENC(2, 0, 4, 4, 0),
|
||||
DBGBVR5_EL1 = A64_SYSREG_ENC(2, 0, 4, 5, 0),
|
||||
DBGBVR6_EL1 = A64_SYSREG_ENC(2, 0, 4, 6, 0),
|
||||
DBGBVR7_EL1 = A64_SYSREG_ENC(2, 0, 4, 7, 0),
|
||||
DBGBVR8_EL1 = A64_SYSREG_ENC(2, 0, 4, 8, 0),
|
||||
DBGBVR9_EL1 = A64_SYSREG_ENC(2, 0, 4, 9, 0),
|
||||
DBGBVR10_EL1 = A64_SYSREG_ENC(2, 0, 4, 10, 0),
|
||||
DBGBVR11_EL1 = A64_SYSREG_ENC(2, 0, 4, 11, 0),
|
||||
DBGBVR12_EL1 = A64_SYSREG_ENC(2, 0, 4, 12, 0),
|
||||
DBGBVR13_EL1 = A64_SYSREG_ENC(2, 0, 4, 13, 0),
|
||||
DBGBVR14_EL1 = A64_SYSREG_ENC(2, 0, 4, 14, 0),
|
||||
DBGBVR15_EL1 = A64_SYSREG_ENC(2, 0, 4, 15, 0),
|
||||
|
||||
DBGBCR0_EL1 = A64_SYSREG_ENC(2, 0, 5, 0, 0),
|
||||
DBGBCR1_EL1 = A64_SYSREG_ENC(2, 0, 5, 1, 0),
|
||||
DBGBCR2_EL1 = A64_SYSREG_ENC(2, 0, 5, 2, 0),
|
||||
DBGBCR3_EL1 = A64_SYSREG_ENC(2, 0, 5, 3, 0),
|
||||
DBGBCR4_EL1 = A64_SYSREG_ENC(2, 0, 5, 4, 0),
|
||||
DBGBCR5_EL1 = A64_SYSREG_ENC(2, 0, 5, 5, 0),
|
||||
DBGBCR6_EL1 = A64_SYSREG_ENC(2, 0, 5, 6, 0),
|
||||
DBGBCR7_EL1 = A64_SYSREG_ENC(2, 0, 5, 7, 0),
|
||||
DBGBCR8_EL1 = A64_SYSREG_ENC(2, 0, 5, 8, 0),
|
||||
DBGBCR9_EL1 = A64_SYSREG_ENC(2, 0, 5, 9, 0),
|
||||
DBGBCR10_EL1 = A64_SYSREG_ENC(2, 0, 5, 10, 0),
|
||||
DBGBCR11_EL1 = A64_SYSREG_ENC(2, 0, 5, 11, 0),
|
||||
DBGBCR12_EL1 = A64_SYSREG_ENC(2, 0, 5, 12, 0),
|
||||
DBGBCR13_EL1 = A64_SYSREG_ENC(2, 0, 5, 13, 0),
|
||||
DBGBCR14_EL1 = A64_SYSREG_ENC(2, 0, 5, 14, 0),
|
||||
DBGBCR15_EL1 = A64_SYSREG_ENC(2, 0, 5, 15, 0),
|
||||
|
||||
DBGWVR0_EL1 = A64_SYSREG_ENC(2, 0, 6, 0, 0),
|
||||
DBGWVR1_EL1 = A64_SYSREG_ENC(2, 0, 6, 1, 0),
|
||||
DBGWVR2_EL1 = A64_SYSREG_ENC(2, 0, 6, 2, 0),
|
||||
DBGWVR3_EL1 = A64_SYSREG_ENC(2, 0, 6, 3, 0),
|
||||
DBGWVR4_EL1 = A64_SYSREG_ENC(2, 0, 6, 4, 0),
|
||||
DBGWVR5_EL1 = A64_SYSREG_ENC(2, 0, 6, 5, 0),
|
||||
DBGWVR6_EL1 = A64_SYSREG_ENC(2, 0, 6, 6, 0),
|
||||
DBGWVR7_EL1 = A64_SYSREG_ENC(2, 0, 6, 7, 0),
|
||||
DBGWVR8_EL1 = A64_SYSREG_ENC(2, 0, 6, 8, 0),
|
||||
DBGWVR9_EL1 = A64_SYSREG_ENC(2, 0, 6, 9, 0),
|
||||
DBGWVR10_EL1 = A64_SYSREG_ENC(2, 0, 6, 10, 0),
|
||||
DBGWVR11_EL1 = A64_SYSREG_ENC(2, 0, 6, 11, 0),
|
||||
DBGWVR12_EL1 = A64_SYSREG_ENC(2, 0, 6, 12, 0),
|
||||
DBGWVR13_EL1 = A64_SYSREG_ENC(2, 0, 6, 13, 0),
|
||||
DBGWVR14_EL1 = A64_SYSREG_ENC(2, 0, 6, 14, 0),
|
||||
DBGWVR15_EL1 = A64_SYSREG_ENC(2, 0, 6, 15, 0),
|
||||
|
||||
DBGWCR0_EL1 = A64_SYSREG_ENC(2, 0, 7, 0, 0),
|
||||
DBGWCR1_EL1 = A64_SYSREG_ENC(2, 0, 7, 1, 0),
|
||||
DBGWCR2_EL1 = A64_SYSREG_ENC(2, 0, 7, 2, 0),
|
||||
DBGWCR3_EL1 = A64_SYSREG_ENC(2, 0, 7, 3, 0),
|
||||
DBGWCR4_EL1 = A64_SYSREG_ENC(2, 0, 7, 4, 0),
|
||||
DBGWCR5_EL1 = A64_SYSREG_ENC(2, 0, 7, 5, 0),
|
||||
DBGWCR6_EL1 = A64_SYSREG_ENC(2, 0, 7, 6, 0),
|
||||
DBGWCR7_EL1 = A64_SYSREG_ENC(2, 0, 7, 7, 0),
|
||||
DBGWCR8_EL1 = A64_SYSREG_ENC(2, 0, 7, 8, 0),
|
||||
DBGWCR9_EL1 = A64_SYSREG_ENC(2, 0, 7, 9, 0),
|
||||
DBGWCR10_EL1 = A64_SYSREG_ENC(2, 0, 7, 10, 0),
|
||||
DBGWCR11_EL1 = A64_SYSREG_ENC(2, 0, 7, 11, 0),
|
||||
DBGWCR12_EL1 = A64_SYSREG_ENC(2, 0, 7, 12, 0),
|
||||
DBGWCR13_EL1 = A64_SYSREG_ENC(2, 0, 7, 13, 0),
|
||||
DBGWCR14_EL1 = A64_SYSREG_ENC(2, 0, 7, 14, 0),
|
||||
DBGWCR15_EL1 = A64_SYSREG_ENC(2, 0, 7, 15, 0),
|
||||
|
||||
MDRAR_EL1 = A64_SYSREG_ENC(2, 1, 0, 0, 0),
|
||||
OSLAR_EL1 = A64_SYSREG_ENC(2, 1, 4, 0, 0),
|
||||
OSLSR_EL1 = A64_SYSREG_ENC(2, 1, 4, 1, 0),
|
||||
OSDLR_EL1 = A64_SYSREG_ENC(2, 1, 4, 3, 0),
|
||||
DBGPRCR_EL1 = A64_SYSREG_ENC(2, 1, 4, 4, 0),
|
||||
|
||||
DBGCLAIMSET_EL1 = A64_SYSREG_ENC(2, 7, 6, 8, 0),
|
||||
DBGCLAIMCLR_EL1 = A64_SYSREG_ENC(2, 7, 6, 9, 0),
|
||||
DBGAUTHSTATUS_EL1 = A64_SYSREG_ENC(2, 7, 6, 14, 0),
|
||||
|
||||
DBGDEVID2 = A64_SYSREG_ENC(2, 7, 7, 0, 0),
|
||||
DBGDEVID1 = A64_SYSREG_ENC(2, 7, 7, 1, 0),
|
||||
DBGDEVID0 = A64_SYSREG_ENC(2, 7, 7, 2, 0),
|
||||
|
||||
// The following registers are defined to allow access from AArch64 to
|
||||
// registers which are only used in the AArch32 architecture.
|
||||
DACR32_EL2 = 0xe180,
|
||||
IFSR32_EL2 = 0xe281,
|
||||
TEEHBR32_EL1 = 0x9080,
|
||||
SDER32_EL3 = 0xf089,
|
||||
FPEXC32_EL2 = 0xe298,
|
||||
|
||||
// Cyclone specific system registers
|
||||
CPM_IOACC_CTL_EL3 = 0xff90,
|
||||
|
||||
// Architectural system registers
|
||||
ID_PFR0_EL1 = 0xc008,
|
||||
ID_PFR1_EL1 = 0xc009,
|
||||
ID_DFR0_EL1 = 0xc00a,
|
||||
ID_AFR0_EL1 = 0xc00b,
|
||||
ID_ISAR0_EL1 = 0xc010,
|
||||
ID_ISAR1_EL1 = 0xc011,
|
||||
ID_ISAR2_EL1 = 0xc012,
|
||||
ID_ISAR3_EL1 = 0xc013,
|
||||
ID_ISAR4_EL1 = 0xc014,
|
||||
ID_ISAR5_EL1 = 0xc015,
|
||||
AFSR1_EL1 = 0xc289, // note same as old AIFSR_EL1
|
||||
AFSR0_EL1 = 0xc288, // note same as old ADFSR_EL1
|
||||
REVIDR_EL1 = 0xc006 // note same as old ECOIDR_EL1
|
||||
|
||||
};
|
||||
#undef A64_SYSREG_ENC
|
||||
|
||||
static inline const char *getSystemRegisterName(SystemRegister Reg) {
|
||||
switch(Reg) {
|
||||
default: return NULL; // Caller is responsible for handling invalid value.
|
||||
case SPSR_EL1: return "SPSR_EL1";
|
||||
case ELR_EL1: return "ELR_EL1";
|
||||
case SP_EL0: return "SP_EL0";
|
||||
case SPSel: return "SPSel";
|
||||
case DAIF: return "DAIF";
|
||||
case CurrentEL: return "CurrentEL";
|
||||
case NZCV: return "NZCV";
|
||||
case FPCR: return "FPCR";
|
||||
case FPSR: return "FPSR";
|
||||
case DSPSR: return "DSPSR";
|
||||
case DLR: return "DLR";
|
||||
case SPSR_EL2: return "SPSR_EL2";
|
||||
case ELR_EL2: return "ELR_EL2";
|
||||
case SP_EL1: return "SP_EL1";
|
||||
case SPSR_irq: return "SPSR_irq";
|
||||
case SPSR_abt: return "SPSR_abt";
|
||||
case SPSR_und: return "SPSR_und";
|
||||
case SPSR_fiq: return "SPSR_fiq";
|
||||
case SPSR_EL3: return "SPSR_EL3";
|
||||
case ELR_EL3: return "ELR_EL3";
|
||||
case SP_EL2: return "SP_EL2";
|
||||
case MIDR_EL1: return "MIDR_EL1";
|
||||
case CTR_EL0: return "CTR_EL0";
|
||||
case MPIDR_EL1: return "MPIDR_EL1";
|
||||
case DCZID_EL0: return "DCZID_EL0";
|
||||
case MVFR0_EL1: return "MVFR0_EL1";
|
||||
case MVFR1_EL1: return "MVFR1_EL1";
|
||||
case ID_AA64PFR0_EL1: return "ID_AA64PFR0_EL1";
|
||||
case ID_AA64PFR1_EL1: return "ID_AA64PFR1_EL1";
|
||||
case ID_AA64DFR0_EL1: return "ID_AA64DFR0_EL1";
|
||||
case ID_AA64DFR1_EL1: return "ID_AA64DFR1_EL1";
|
||||
case ID_AA64ISAR0_EL1: return "ID_AA64ISAR0_EL1";
|
||||
case ID_AA64ISAR1_EL1: return "ID_AA64ISAR1_EL1";
|
||||
case ID_AA64MMFR0_EL1: return "ID_AA64MMFR0_EL1";
|
||||
case ID_AA64MMFR1_EL1: return "ID_AA64MMFR1_EL1";
|
||||
case CCSIDR_EL1: return "CCSIDR_EL1";
|
||||
case CLIDR_EL1: return "CLIDR_EL1";
|
||||
case AIDR_EL1: return "AIDR_EL1";
|
||||
case CSSELR_EL1: return "CSSELR_EL1";
|
||||
case VPIDR_EL2: return "VPIDR_EL2";
|
||||
case VMPIDR_EL2: return "VMPIDR_EL2";
|
||||
case SCTLR_EL1: return "SCTLR_EL1";
|
||||
case SCTLR_EL2: return "SCTLR_EL2";
|
||||
case SCTLR_EL3: return "SCTLR_EL3";
|
||||
case ACTLR_EL1: return "ACTLR_EL1";
|
||||
case ACTLR_EL2: return "ACTLR_EL2";
|
||||
case ACTLR_EL3: return "ACTLR_EL3";
|
||||
case CPACR_EL1: return "CPACR_EL1";
|
||||
case CPTR_EL2: return "CPTR_EL2";
|
||||
case CPTR_EL3: return "CPTR_EL3";
|
||||
case SCR_EL3: return "SCR_EL3";
|
||||
case HCR_EL2: return "HCR_EL2";
|
||||
case MDCR_EL2: return "MDCR_EL2";
|
||||
case MDCR_EL3: return "MDCR_EL3";
|
||||
case HSTR_EL2: return "HSTR_EL2";
|
||||
case HACR_EL2: return "HACR_EL2";
|
||||
case TTBR0_EL1: return "TTBR0_EL1";
|
||||
case TTBR1_EL1: return "TTBR1_EL1";
|
||||
case TTBR0_EL2: return "TTBR0_EL2";
|
||||
case TTBR0_EL3: return "TTBR0_EL3";
|
||||
case VTTBR_EL2: return "VTTBR_EL2";
|
||||
case TCR_EL1: return "TCR_EL1";
|
||||
case TCR_EL2: return "TCR_EL2";
|
||||
case TCR_EL3: return "TCR_EL3";
|
||||
case VTCR_EL2: return "VTCR_EL2";
|
||||
case ADFSR_EL2: return "ADFSR_EL2";
|
||||
case AIFSR_EL2: return "AIFSR_EL2";
|
||||
case ADFSR_EL3: return "ADFSR_EL3";
|
||||
case AIFSR_EL3: return "AIFSR_EL3";
|
||||
case ESR_EL1: return "ESR_EL1";
|
||||
case ESR_EL2: return "ESR_EL2";
|
||||
case ESR_EL3: return "ESR_EL3";
|
||||
case FAR_EL1: return "FAR_EL1";
|
||||
case FAR_EL2: return "FAR_EL2";
|
||||
case FAR_EL3: return "FAR_EL3";
|
||||
case HPFAR_EL2: return "HPFAR_EL2";
|
||||
case PAR_EL1: return "PAR_EL1";
|
||||
case MAIR_EL1: return "MAIR_EL1";
|
||||
case MAIR_EL2: return "MAIR_EL2";
|
||||
case MAIR_EL3: return "MAIR_EL3";
|
||||
case AMAIR_EL1: return "AMAIR_EL1";
|
||||
case AMAIR_EL2: return "AMAIR_EL2";
|
||||
case AMAIR_EL3: return "AMAIR_EL3";
|
||||
case VBAR_EL1: return "VBAR_EL1";
|
||||
case VBAR_EL2: return "VBAR_EL2";
|
||||
case VBAR_EL3: return "VBAR_EL3";
|
||||
case RVBAR_EL1: return "RVBAR_EL1";
|
||||
case RVBAR_EL2: return "RVBAR_EL2";
|
||||
case RVBAR_EL3: return "RVBAR_EL3";
|
||||
case ISR_EL1: return "ISR_EL1";
|
||||
case CONTEXTIDR_EL1: return "CONTEXTIDR_EL1";
|
||||
case TPIDR_EL0: return "TPIDR_EL0";
|
||||
case TPIDRRO_EL0: return "TPIDRRO_EL0";
|
||||
case TPIDR_EL1: return "TPIDR_EL1";
|
||||
case TPIDR_EL2: return "TPIDR_EL2";
|
||||
case TPIDR_EL3: return "TPIDR_EL3";
|
||||
case TEECR32_EL1: return "TEECR32_EL1";
|
||||
case CNTFRQ_EL0: return "CNTFRQ_EL0";
|
||||
case CNTPCT_EL0: return "CNTPCT_EL0";
|
||||
case CNTVCT_EL0: return "CNTVCT_EL0";
|
||||
case CNTVOFF_EL2: return "CNTVOFF_EL2";
|
||||
case CNTKCTL_EL1: return "CNTKCTL_EL1";
|
||||
case CNTHCTL_EL2: return "CNTHCTL_EL2";
|
||||
case CNTP_TVAL_EL0: return "CNTP_TVAL_EL0";
|
||||
case CNTP_CTL_EL0: return "CNTP_CTL_EL0";
|
||||
case CNTP_CVAL_EL0: return "CNTP_CVAL_EL0";
|
||||
case CNTV_TVAL_EL0: return "CNTV_TVAL_EL0";
|
||||
case CNTV_CTL_EL0: return "CNTV_CTL_EL0";
|
||||
case CNTV_CVAL_EL0: return "CNTV_CVAL_EL0";
|
||||
case CNTHP_TVAL_EL2: return "CNTHP_TVAL_EL2";
|
||||
case CNTHP_CTL_EL2: return "CNTHP_CTL_EL2";
|
||||
case CNTHP_CVAL_EL2: return "CNTHP_CVAL_EL2";
|
||||
case CNTPS_TVAL_EL1: return "CNTPS_TVAL_EL1";
|
||||
case CNTPS_CTL_EL1: return "CNTPS_CTL_EL1";
|
||||
case CNTPS_CVAL_EL1: return "CNTPS_CVAL_EL1";
|
||||
case DACR32_EL2: return "DACR32_EL2";
|
||||
case IFSR32_EL2: return "IFSR32_EL2";
|
||||
case TEEHBR32_EL1: return "TEEHBR32_EL1";
|
||||
case SDER32_EL3: return "SDER32_EL3";
|
||||
case FPEXC32_EL2: return "FPEXC32_EL2";
|
||||
case PMEVCNTR0_EL0: return "PMEVCNTR0_EL0";
|
||||
case PMEVCNTR1_EL0: return "PMEVCNTR1_EL0";
|
||||
case PMEVCNTR2_EL0: return "PMEVCNTR2_EL0";
|
||||
case PMEVCNTR3_EL0: return "PMEVCNTR3_EL0";
|
||||
case PMEVCNTR4_EL0: return "PMEVCNTR4_EL0";
|
||||
case PMEVCNTR5_EL0: return "PMEVCNTR5_EL0";
|
||||
case PMEVCNTR6_EL0: return "PMEVCNTR6_EL0";
|
||||
case PMEVCNTR7_EL0: return "PMEVCNTR7_EL0";
|
||||
case PMEVCNTR8_EL0: return "PMEVCNTR8_EL0";
|
||||
case PMEVCNTR9_EL0: return "PMEVCNTR9_EL0";
|
||||
case PMEVCNTR10_EL0: return "PMEVCNTR10_EL0";
|
||||
case PMEVCNTR11_EL0: return "PMEVCNTR11_EL0";
|
||||
case PMEVCNTR12_EL0: return "PMEVCNTR12_EL0";
|
||||
case PMEVCNTR13_EL0: return "PMEVCNTR13_EL0";
|
||||
case PMEVCNTR14_EL0: return "PMEVCNTR14_EL0";
|
||||
case PMEVCNTR15_EL0: return "PMEVCNTR15_EL0";
|
||||
case PMEVCNTR16_EL0: return "PMEVCNTR16_EL0";
|
||||
case PMEVCNTR17_EL0: return "PMEVCNTR17_EL0";
|
||||
case PMEVCNTR18_EL0: return "PMEVCNTR18_EL0";
|
||||
case PMEVCNTR19_EL0: return "PMEVCNTR19_EL0";
|
||||
case PMEVCNTR20_EL0: return "PMEVCNTR20_EL0";
|
||||
case PMEVCNTR21_EL0: return "PMEVCNTR21_EL0";
|
||||
case PMEVCNTR22_EL0: return "PMEVCNTR22_EL0";
|
||||
case PMEVCNTR23_EL0: return "PMEVCNTR23_EL0";
|
||||
case PMEVCNTR24_EL0: return "PMEVCNTR24_EL0";
|
||||
case PMEVCNTR25_EL0: return "PMEVCNTR25_EL0";
|
||||
case PMEVCNTR26_EL0: return "PMEVCNTR26_EL0";
|
||||
case PMEVCNTR27_EL0: return "PMEVCNTR27_EL0";
|
||||
case PMEVCNTR28_EL0: return "PMEVCNTR28_EL0";
|
||||
case PMEVCNTR29_EL0: return "PMEVCNTR29_EL0";
|
||||
case PMEVCNTR30_EL0: return "PMEVCNTR30_EL0";
|
||||
case PMEVTYPER0_EL0: return "PMEVTYPER0_EL0";
|
||||
case PMEVTYPER1_EL0: return "PMEVTYPER1_EL0";
|
||||
case PMEVTYPER2_EL0: return "PMEVTYPER2_EL0";
|
||||
case PMEVTYPER3_EL0: return "PMEVTYPER3_EL0";
|
||||
case PMEVTYPER4_EL0: return "PMEVTYPER4_EL0";
|
||||
case PMEVTYPER5_EL0: return "PMEVTYPER5_EL0";
|
||||
case PMEVTYPER6_EL0: return "PMEVTYPER6_EL0";
|
||||
case PMEVTYPER7_EL0: return "PMEVTYPER7_EL0";
|
||||
case PMEVTYPER8_EL0: return "PMEVTYPER8_EL0";
|
||||
case PMEVTYPER9_EL0: return "PMEVTYPER9_EL0";
|
||||
case PMEVTYPER10_EL0: return "PMEVTYPER10_EL0";
|
||||
case PMEVTYPER11_EL0: return "PMEVTYPER11_EL0";
|
||||
case PMEVTYPER12_EL0: return "PMEVTYPER12_EL0";
|
||||
case PMEVTYPER13_EL0: return "PMEVTYPER13_EL0";
|
||||
case PMEVTYPER14_EL0: return "PMEVTYPER14_EL0";
|
||||
case PMEVTYPER15_EL0: return "PMEVTYPER15_EL0";
|
||||
case PMEVTYPER16_EL0: return "PMEVTYPER16_EL0";
|
||||
case PMEVTYPER17_EL0: return "PMEVTYPER17_EL0";
|
||||
case PMEVTYPER18_EL0: return "PMEVTYPER18_EL0";
|
||||
case PMEVTYPER19_EL0: return "PMEVTYPER19_EL0";
|
||||
case PMEVTYPER20_EL0: return "PMEVTYPER20_EL0";
|
||||
case PMEVTYPER21_EL0: return "PMEVTYPER21_EL0";
|
||||
case PMEVTYPER22_EL0: return "PMEVTYPER22_EL0";
|
||||
case PMEVTYPER23_EL0: return "PMEVTYPER23_EL0";
|
||||
case PMEVTYPER24_EL0: return "PMEVTYPER24_EL0";
|
||||
case PMEVTYPER25_EL0: return "PMEVTYPER25_EL0";
|
||||
case PMEVTYPER26_EL0: return "PMEVTYPER26_EL0";
|
||||
case PMEVTYPER27_EL0: return "PMEVTYPER27_EL0";
|
||||
case PMEVTYPER28_EL0: return "PMEVTYPER28_EL0";
|
||||
case PMEVTYPER29_EL0: return "PMEVTYPER29_EL0";
|
||||
case PMEVTYPER30_EL0: return "PMEVTYPER30_EL0";
|
||||
case PMCCFILTR_EL0: return "PMCCFILTR_EL0";
|
||||
case RMR_EL3: return "RMR_EL3";
|
||||
case RMR_EL2: return "RMR_EL2";
|
||||
case RMR_EL1: return "RMR_EL1";
|
||||
case CPM_IOACC_CTL_EL3: return "CPM_IOACC_CTL_EL3";
|
||||
case MDCCSR_EL0: return "MDCCSR_EL0";
|
||||
case MDCCINT_EL1: return "MDCCINT_EL1";
|
||||
case DBGDTR_EL0: return "DBGDTR_EL0";
|
||||
case DBGDTRRX_EL0: return "DBGDTRRX_EL0";
|
||||
case DBGVCR32_EL2: return "DBGVCR32_EL2";
|
||||
case OSDTRRX_EL1: return "OSDTRRX_EL1";
|
||||
case MDSCR_EL1: return "MDSCR_EL1";
|
||||
case OSDTRTX_EL1: return "OSDTRTX_EL1";
|
||||
case OSECCR_EL11: return "OSECCR_EL11";
|
||||
case DBGBVR0_EL1: return "DBGBVR0_EL1";
|
||||
case DBGBVR1_EL1: return "DBGBVR1_EL1";
|
||||
case DBGBVR2_EL1: return "DBGBVR2_EL1";
|
||||
case DBGBVR3_EL1: return "DBGBVR3_EL1";
|
||||
case DBGBVR4_EL1: return "DBGBVR4_EL1";
|
||||
case DBGBVR5_EL1: return "DBGBVR5_EL1";
|
||||
case DBGBVR6_EL1: return "DBGBVR6_EL1";
|
||||
case DBGBVR7_EL1: return "DBGBVR7_EL1";
|
||||
case DBGBVR8_EL1: return "DBGBVR8_EL1";
|
||||
case DBGBVR9_EL1: return "DBGBVR9_EL1";
|
||||
case DBGBVR10_EL1: return "DBGBVR10_EL1";
|
||||
case DBGBVR11_EL1: return "DBGBVR11_EL1";
|
||||
case DBGBVR12_EL1: return "DBGBVR12_EL1";
|
||||
case DBGBVR13_EL1: return "DBGBVR13_EL1";
|
||||
case DBGBVR14_EL1: return "DBGBVR14_EL1";
|
||||
case DBGBVR15_EL1: return "DBGBVR15_EL1";
|
||||
case DBGBCR0_EL1: return "DBGBCR0_EL1";
|
||||
case DBGBCR1_EL1: return "DBGBCR1_EL1";
|
||||
case DBGBCR2_EL1: return "DBGBCR2_EL1";
|
||||
case DBGBCR3_EL1: return "DBGBCR3_EL1";
|
||||
case DBGBCR4_EL1: return "DBGBCR4_EL1";
|
||||
case DBGBCR5_EL1: return "DBGBCR5_EL1";
|
||||
case DBGBCR6_EL1: return "DBGBCR6_EL1";
|
||||
case DBGBCR7_EL1: return "DBGBCR7_EL1";
|
||||
case DBGBCR8_EL1: return "DBGBCR8_EL1";
|
||||
case DBGBCR9_EL1: return "DBGBCR9_EL1";
|
||||
case DBGBCR10_EL1: return "DBGBCR10_EL1";
|
||||
case DBGBCR11_EL1: return "DBGBCR11_EL1";
|
||||
case DBGBCR12_EL1: return "DBGBCR12_EL1";
|
||||
case DBGBCR13_EL1: return "DBGBCR13_EL1";
|
||||
case DBGBCR14_EL1: return "DBGBCR14_EL1";
|
||||
case DBGBCR15_EL1: return "DBGBCR15_EL1";
|
||||
case DBGWVR0_EL1: return "DBGWVR0_EL1";
|
||||
case DBGWVR1_EL1: return "DBGWVR1_EL1";
|
||||
case DBGWVR2_EL1: return "DBGWVR2_EL1";
|
||||
case DBGWVR3_EL1: return "DBGWVR3_EL1";
|
||||
case DBGWVR4_EL1: return "DBGWVR4_EL1";
|
||||
case DBGWVR5_EL1: return "DBGWVR5_EL1";
|
||||
case DBGWVR6_EL1: return "DBGWVR6_EL1";
|
||||
case DBGWVR7_EL1: return "DBGWVR7_EL1";
|
||||
case DBGWVR8_EL1: return "DBGWVR8_EL1";
|
||||
case DBGWVR9_EL1: return "DBGWVR9_EL1";
|
||||
case DBGWVR10_EL1: return "DBGWVR10_EL1";
|
||||
case DBGWVR11_EL1: return "DBGWVR11_EL1";
|
||||
case DBGWVR12_EL1: return "DBGWVR12_EL1";
|
||||
case DBGWVR13_EL1: return "DBGWVR13_EL1";
|
||||
case DBGWVR14_EL1: return "DBGWVR14_EL1";
|
||||
case DBGWVR15_EL1: return "DBGWVR15_EL1";
|
||||
case DBGWCR0_EL1: return "DBGWCR0_EL1";
|
||||
case DBGWCR1_EL1: return "DBGWCR1_EL1";
|
||||
case DBGWCR2_EL1: return "DBGWCR2_EL1";
|
||||
case DBGWCR3_EL1: return "DBGWCR3_EL1";
|
||||
case DBGWCR4_EL1: return "DBGWCR4_EL1";
|
||||
case DBGWCR5_EL1: return "DBGWCR5_EL1";
|
||||
case DBGWCR6_EL1: return "DBGWCR6_EL1";
|
||||
case DBGWCR7_EL1: return "DBGWCR7_EL1";
|
||||
case DBGWCR8_EL1: return "DBGWCR8_EL1";
|
||||
case DBGWCR9_EL1: return "DBGWCR9_EL1";
|
||||
case DBGWCR10_EL1: return "DBGWCR10_EL1";
|
||||
case DBGWCR11_EL1: return "DBGWCR11_EL1";
|
||||
case DBGWCR12_EL1: return "DBGWCR12_EL1";
|
||||
case DBGWCR13_EL1: return "DBGWCR13_EL1";
|
||||
case DBGWCR14_EL1: return "DBGWCR14_EL1";
|
||||
case DBGWCR15_EL1: return "DBGWCR15_EL1";
|
||||
case MDRAR_EL1: return "MDRAR_EL1";
|
||||
case OSLAR_EL1: return "OSLAR_EL1";
|
||||
case OSLSR_EL1: return "OSLSR_EL1";
|
||||
case OSDLR_EL1: return "OSDLR_EL1";
|
||||
case DBGPRCR_EL1: return "DBGPRCR_EL1";
|
||||
case DBGCLAIMSET_EL1: return "DBGCLAIMSET_EL1";
|
||||
case DBGCLAIMCLR_EL1: return "DBGCLAIMCLR_EL1";
|
||||
case DBGAUTHSTATUS_EL1: return "DBGAUTHSTATUS_EL1";
|
||||
case DBGDEVID2: return "DBGDEVID2";
|
||||
case DBGDEVID1: return "DBGDEVID1";
|
||||
case DBGDEVID0: return "DBGDEVID0";
|
||||
case ID_PFR0_EL1: return "ID_PFR0_EL1";
|
||||
case ID_PFR1_EL1: return "ID_PFR1_EL1";
|
||||
case ID_DFR0_EL1: return "ID_DFR0_EL1";
|
||||
case ID_AFR0_EL1: return "ID_AFR0_EL1";
|
||||
case ID_ISAR0_EL1: return "ID_ISAR0_EL1";
|
||||
case ID_ISAR1_EL1: return "ID_ISAR1_EL1";
|
||||
case ID_ISAR2_EL1: return "ID_ISAR2_EL1";
|
||||
case ID_ISAR3_EL1: return "ID_ISAR3_EL1";
|
||||
case ID_ISAR4_EL1: return "ID_ISAR4_EL1";
|
||||
case ID_ISAR5_EL1: return "ID_ISAR5_EL1";
|
||||
case AFSR1_EL1: return "AFSR1_EL1";
|
||||
case AFSR0_EL1: return "AFSR0_EL1";
|
||||
case REVIDR_EL1: return "REVIDR_EL1";
|
||||
}
|
||||
}
|
||||
|
||||
enum CPSRField {
|
||||
InvalidCPSRField = 0xff,
|
||||
cpsr_SPSel = 0x5,
|
||||
cpsr_DAIFSet = 0x1e,
|
||||
cpsr_DAIFClr = 0x1f
|
||||
};
|
||||
|
||||
static inline const char *getCPSRFieldName(CPSRField Val) {
|
||||
switch(Val) {
|
||||
default: assert(0 && "Invalid system register value!");
|
||||
case cpsr_SPSel: return "SPSel";
|
||||
case cpsr_DAIFSet: return "DAIFSet";
|
||||
case cpsr_DAIFClr: return "DAIFClr";
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace ARM64SYS
|
||||
|
||||
namespace ARM64II {
|
||||
/// Target Operand Flag enum.
|
||||
enum TOF {
|
||||
//===------------------------------------------------------------------===//
|
||||
// ARM64 Specific MachineOperand flags.
|
||||
|
||||
MO_NO_FLAG,
|
||||
|
||||
MO_FRAGMENT = 0x7,
|
||||
|
||||
/// MO_PAGE - A symbol operand with this flag represents the pc-relative
|
||||
/// offset of the 4K page containing the symbol. This is used with the
|
||||
/// ADRP instruction.
|
||||
MO_PAGE = 1,
|
||||
|
||||
/// MO_PAGEOFF - A symbol operand with this flag represents the offset of
|
||||
/// that symbol within a 4K page. This offset is added to the page address
|
||||
/// to produce the complete address.
|
||||
MO_PAGEOFF = 2,
|
||||
|
||||
/// MO_G3 - A symbol operand with this flag (granule 3) represents the high
|
||||
/// 16-bits of a 64-bit address, used in a MOVZ or MOVK instruction
|
||||
MO_G3 = 3,
|
||||
|
||||
/// MO_G2 - A symbol operand with this flag (granule 2) represents the bits
|
||||
/// 32-47 of a 64-bit address, used in a MOVZ or MOVK instruction
|
||||
MO_G2 = 4,
|
||||
|
||||
/// MO_G1 - A symbol operand with this flag (granule 1) represents the bits
|
||||
/// 16-31 of a 64-bit address, used in a MOVZ or MOVK instruction
|
||||
MO_G1 = 5,
|
||||
|
||||
/// MO_G0 - A symbol operand with this flag (granule 0) represents the bits
|
||||
/// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction
|
||||
MO_G0 = 6,
|
||||
|
||||
/// MO_GOT - This flag indicates that a symbol operand represents the
|
||||
/// address of the GOT entry for the symbol, rather than the address of
|
||||
/// the symbol itself.
|
||||
MO_GOT = 8,
|
||||
|
||||
/// MO_NC - Indicates whether the linker is expected to check the symbol
|
||||
/// reference for overflow. For example in an ADRP/ADD pair of relocations
|
||||
/// the ADRP usually does check, but not the ADD.
|
||||
MO_NC = 0x10,
|
||||
|
||||
/// MO_TLS - Indicates that the operand being accessed is some kind of
|
||||
/// thread-local symbol. On Darwin, only one type of thread-local access
|
||||
/// exists (pre linker-relaxation), but on ELF the TLSModel used for the
|
||||
/// referee will affect interpretation.
|
||||
MO_TLS = 0x20
|
||||
};
|
||||
} // end namespace ARM64II
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
237
lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp
Normal file
237
lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp
Normal file
@ -0,0 +1,237 @@
|
||||
//===-- ARM64ELFObjectWriter.cpp - ARM64 ELF Writer -----------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file handles ELF-specific object emission, converting LLVM's internal
|
||||
// fixups into the appropriate relocations.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "MCTargetDesc/ARM64FixupKinds.h"
|
||||
#include "MCTargetDesc/ARM64MCExpr.h"
|
||||
#include "MCTargetDesc/ARM64MCTargetDesc.h"
|
||||
#include "llvm/MC/MCELFObjectWriter.h"
|
||||
#include "llvm/MC/MCValue.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
class ARM64ELFObjectWriter : public MCELFObjectTargetWriter {
|
||||
public:
|
||||
ARM64ELFObjectWriter(uint8_t OSABI);
|
||||
|
||||
virtual ~ARM64ELFObjectWriter();
|
||||
|
||||
protected:
|
||||
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
|
||||
bool IsPCRel) const override;
|
||||
|
||||
private:
|
||||
};
|
||||
}
|
||||
|
||||
ARM64ELFObjectWriter::ARM64ELFObjectWriter(uint8_t OSABI)
|
||||
: MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64,
|
||||
/*HasRelocationAddend*/ true) {}
|
||||
|
||||
ARM64ELFObjectWriter::~ARM64ELFObjectWriter() {}
|
||||
|
||||
unsigned ARM64ELFObjectWriter::GetRelocType(const MCValue &Target,
|
||||
const MCFixup &Fixup,
|
||||
bool IsPCRel) const {
|
||||
ARM64MCExpr::VariantKind RefKind =
|
||||
static_cast<ARM64MCExpr::VariantKind>(Target.getRefKind());
|
||||
ARM64MCExpr::VariantKind SymLoc = ARM64MCExpr::getSymbolLoc(RefKind);
|
||||
bool IsNC = ARM64MCExpr::isNotChecked(RefKind);
|
||||
|
||||
assert((!Target.getSymA() ||
|
||||
Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None) &&
|
||||
"Should only be expression-level modifiers here");
|
||||
|
||||
assert((!Target.getSymB() ||
|
||||
Target.getSymB()->getKind() == MCSymbolRefExpr::VK_None) &&
|
||||
"Should only be expression-level modifiers here");
|
||||
|
||||
if (IsPCRel) {
|
||||
switch ((unsigned)Fixup.getKind()) {
|
||||
case FK_Data_2:
|
||||
return ELF::R_AARCH64_PREL16;
|
||||
case FK_Data_4:
|
||||
return ELF::R_AARCH64_PREL32;
|
||||
case FK_Data_8:
|
||||
return ELF::R_AARCH64_PREL64;
|
||||
case ARM64::fixup_arm64_pcrel_adr_imm21:
|
||||
llvm_unreachable("No ELF relocations supported for ADR at the moment");
|
||||
case ARM64::fixup_arm64_pcrel_adrp_imm21:
|
||||
if (SymLoc == ARM64MCExpr::VK_ABS && !IsNC)
|
||||
return ELF::R_AARCH64_ADR_PREL_PG_HI21;
|
||||
if (SymLoc == ARM64MCExpr::VK_GOT && !IsNC)
|
||||
return ELF::R_AARCH64_ADR_GOT_PAGE;
|
||||
if (SymLoc == ARM64MCExpr::VK_GOTTPREL && !IsNC)
|
||||
return ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21;
|
||||
if (SymLoc == ARM64MCExpr::VK_TLSDESC && !IsNC)
|
||||
return ELF::R_AARCH64_TLSDESC_ADR_PAGE;
|
||||
llvm_unreachable("invalid symbol kind for ADRP relocation");
|
||||
case ARM64::fixup_arm64_pcrel_branch26:
|
||||
return ELF::R_AARCH64_JUMP26;
|
||||
case ARM64::fixup_arm64_pcrel_call26:
|
||||
return ELF::R_AARCH64_CALL26;
|
||||
case ARM64::fixup_arm64_pcrel_imm19:
|
||||
return ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19;
|
||||
default:
|
||||
llvm_unreachable("Unsupported pc-relative fixup kind");
|
||||
}
|
||||
} else {
|
||||
switch ((unsigned)Fixup.getKind()) {
|
||||
case FK_Data_2:
|
||||
return ELF::R_AARCH64_ABS16;
|
||||
case FK_Data_4:
|
||||
return ELF::R_AARCH64_ABS32;
|
||||
case FK_Data_8:
|
||||
return ELF::R_AARCH64_ABS64;
|
||||
case ARM64::fixup_arm64_add_imm12:
|
||||
if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
|
||||
return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC;
|
||||
if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
|
||||
return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12;
|
||||
if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
|
||||
return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC;
|
||||
if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
|
||||
return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12;
|
||||
if (SymLoc == ARM64MCExpr::VK_TLSDESC && IsNC)
|
||||
return ELF::R_AARCH64_TLSDESC_ADD_LO12_NC;
|
||||
if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
|
||||
return ELF::R_AARCH64_ADD_ABS_LO12_NC;
|
||||
|
||||
report_fatal_error("invalid fixup for add (uimm12) instruction");
|
||||
return 0;
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale1:
|
||||
if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
|
||||
return ELF::R_AARCH64_LDST8_ABS_LO12_NC;
|
||||
if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
|
||||
return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12;
|
||||
if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
|
||||
return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC;
|
||||
if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
|
||||
return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12;
|
||||
if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
|
||||
return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC;
|
||||
|
||||
report_fatal_error("invalid fixup for 8-bit load/store instruction");
|
||||
return 0;
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale2:
|
||||
if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
|
||||
return ELF::R_AARCH64_LDST16_ABS_LO12_NC;
|
||||
if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
|
||||
return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12;
|
||||
if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
|
||||
return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC;
|
||||
if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
|
||||
return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12;
|
||||
if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
|
||||
return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC;
|
||||
|
||||
report_fatal_error("invalid fixup for 16-bit load/store instruction");
|
||||
return 0;
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale4:
|
||||
if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
|
||||
return ELF::R_AARCH64_LDST32_ABS_LO12_NC;
|
||||
if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
|
||||
return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12;
|
||||
if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
|
||||
return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC;
|
||||
if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
|
||||
return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12;
|
||||
if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
|
||||
return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC;
|
||||
|
||||
report_fatal_error("invalid fixup for 32-bit load/store instruction");
|
||||
return 0;
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale8:
|
||||
if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
|
||||
return ELF::R_AARCH64_LDST64_ABS_LO12_NC;
|
||||
if (SymLoc == ARM64MCExpr::VK_GOT && IsNC)
|
||||
return ELF::R_AARCH64_LD64_GOT_LO12_NC;
|
||||
if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
|
||||
return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12;
|
||||
if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
|
||||
return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC;
|
||||
if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
|
||||
return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12;
|
||||
if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
|
||||
return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC;
|
||||
if (SymLoc == ARM64MCExpr::VK_GOTTPREL && IsNC)
|
||||
return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
|
||||
if (SymLoc == ARM64MCExpr::VK_TLSDESC && IsNC)
|
||||
return ELF::R_AARCH64_TLSDESC_LD64_LO12_NC;
|
||||
|
||||
report_fatal_error("invalid fixup for 64-bit load/store instruction");
|
||||
return 0;
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale16:
|
||||
if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
|
||||
return ELF::R_AARCH64_LDST128_ABS_LO12_NC;
|
||||
|
||||
report_fatal_error("invalid fixup for 128-bit load/store instruction");
|
||||
return 0;
|
||||
case ARM64::fixup_arm64_movw:
|
||||
if (RefKind == ARM64MCExpr::VK_ABS_G3)
|
||||
return ELF::R_AARCH64_MOVW_UABS_G3;
|
||||
if (RefKind == ARM64MCExpr::VK_ABS_G2)
|
||||
return ELF::R_AARCH64_MOVW_UABS_G2;
|
||||
if (RefKind == ARM64MCExpr::VK_ABS_G2_NC)
|
||||
return ELF::R_AARCH64_MOVW_UABS_G2_NC;
|
||||
if (RefKind == ARM64MCExpr::VK_ABS_G1)
|
||||
return ELF::R_AARCH64_MOVW_UABS_G1;
|
||||
if (RefKind == ARM64MCExpr::VK_ABS_G1_NC)
|
||||
return ELF::R_AARCH64_MOVW_UABS_G1_NC;
|
||||
if (RefKind == ARM64MCExpr::VK_ABS_G0)
|
||||
return ELF::R_AARCH64_MOVW_UABS_G0;
|
||||
if (RefKind == ARM64MCExpr::VK_ABS_G0_NC)
|
||||
return ELF::R_AARCH64_MOVW_UABS_G0_NC;
|
||||
if (RefKind == ARM64MCExpr::VK_DTPREL_G2)
|
||||
return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2;
|
||||
if (RefKind == ARM64MCExpr::VK_DTPREL_G1)
|
||||
return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1;
|
||||
if (RefKind == ARM64MCExpr::VK_DTPREL_G1_NC)
|
||||
return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC;
|
||||
if (RefKind == ARM64MCExpr::VK_DTPREL_G0)
|
||||
return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0;
|
||||
if (RefKind == ARM64MCExpr::VK_DTPREL_G0_NC)
|
||||
return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC;
|
||||
if (RefKind == ARM64MCExpr::VK_TPREL_G2)
|
||||
return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2;
|
||||
if (RefKind == ARM64MCExpr::VK_TPREL_G1)
|
||||
return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1;
|
||||
if (RefKind == ARM64MCExpr::VK_TPREL_G1_NC)
|
||||
return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC;
|
||||
if (RefKind == ARM64MCExpr::VK_TPREL_G0)
|
||||
return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0;
|
||||
if (RefKind == ARM64MCExpr::VK_TPREL_G0_NC)
|
||||
return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC;
|
||||
if (RefKind == ARM64MCExpr::VK_GOTTPREL_G1)
|
||||
return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1;
|
||||
if (RefKind == ARM64MCExpr::VK_GOTTPREL_G0_NC)
|
||||
return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC;
|
||||
report_fatal_error("invalid fixup for movz/movk instruction");
|
||||
return 0;
|
||||
case ARM64::fixup_arm64_tlsdesc_call:
|
||||
return ELF::R_AARCH64_TLSDESC_CALL;
|
||||
default:
|
||||
llvm_unreachable("Unknown ELF relocation type");
|
||||
}
|
||||
}
|
||||
|
||||
llvm_unreachable("Unimplemented fixup -> relocation");
|
||||
}
|
||||
|
||||
MCObjectWriter *llvm::createARM64ELFObjectWriter(raw_ostream &OS,
|
||||
uint8_t OSABI) {
|
||||
MCELFObjectTargetWriter *MOTW = new ARM64ELFObjectWriter(OSABI);
|
||||
return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
|
||||
}
|
158
lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp
Normal file
158
lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp
Normal file
@ -0,0 +1,158 @@
|
||||
//===- lib/MC/ARM64ELFStreamer.cpp - ELF Object Output for ARM64 ----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file assembles .s files and emits AArch64 ELF .o object files. Different
|
||||
// from generic ELF streamer in emitting mapping symbols ($x and $d) to delimit
|
||||
// regions of data and code.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/MC/MCELFStreamer.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/MC/MCAsmBackend.h"
|
||||
#include "llvm/MC/MCAssembler.h"
|
||||
#include "llvm/MC/MCCodeEmitter.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCELF.h"
|
||||
#include "llvm/MC/MCELFStreamer.h"
|
||||
#include "llvm/MC/MCELFSymbolFlags.h"
|
||||
#include "llvm/MC/MCExpr.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCObjectStreamer.h"
|
||||
#include "llvm/MC/MCSection.h"
|
||||
#include "llvm/MC/MCSectionELF.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/MC/MCSymbol.h"
|
||||
#include "llvm/MC/MCValue.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ELF.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
|
||||
/// the appropriate points in the object files. These symbols are defined in the
|
||||
/// AArch64 ELF ABI:
|
||||
/// infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf
|
||||
///
|
||||
/// In brief: $x or $d should be emitted at the start of each contiguous region
|
||||
/// of A64 code or data in a section. In practice, this emission does not rely
|
||||
/// on explicit assembler directives but on inherent properties of the
|
||||
/// directives doing the emission (e.g. ".byte" is data, "add x0, x0, x0" an
|
||||
/// instruction).
|
||||
///
|
||||
/// As a result this system is orthogonal to the DataRegion infrastructure used
|
||||
/// by MachO. Beware!
|
||||
class ARM64ELFStreamer : public MCELFStreamer {
|
||||
public:
|
||||
ARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
|
||||
MCCodeEmitter *Emitter)
|
||||
: MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0),
|
||||
LastEMS(EMS_None) {}
|
||||
|
||||
~ARM64ELFStreamer() {}
|
||||
|
||||
virtual void ChangeSection(const MCSection *Section,
|
||||
const MCExpr *Subsection) {
|
||||
// We have to keep track of the mapping symbol state of any sections we
|
||||
// use. Each one should start off as EMS_None, which is provided as the
|
||||
// default constructor by DenseMap::lookup.
|
||||
LastMappingSymbols[getPreviousSection().first] = LastEMS;
|
||||
LastEMS = LastMappingSymbols.lookup(Section);
|
||||
|
||||
MCELFStreamer::ChangeSection(Section, Subsection);
|
||||
}
|
||||
|
||||
/// This function is the one used to emit instruction data into the ELF
|
||||
/// streamer. We override it to add the appropriate mapping symbol if
|
||||
/// necessary.
|
||||
virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) {
|
||||
EmitA64MappingSymbol();
|
||||
MCELFStreamer::EmitInstruction(Inst, STI);
|
||||
}
|
||||
|
||||
/// This is one of the functions used to emit data into an ELF section, so the
|
||||
/// ARM64 streamer overrides it to add the appropriate mapping symbol ($d)
|
||||
/// if necessary.
|
||||
virtual void EmitBytes(StringRef Data) {
|
||||
EmitDataMappingSymbol();
|
||||
MCELFStreamer::EmitBytes(Data);
|
||||
}
|
||||
|
||||
/// This is one of the functions used to emit data into an ELF section, so the
|
||||
/// ARM64 streamer overrides it to add the appropriate mapping symbol ($d)
|
||||
/// if necessary.
|
||||
virtual void EmitValueImpl(const MCExpr *Value, unsigned Size) {
|
||||
EmitDataMappingSymbol();
|
||||
MCELFStreamer::EmitValueImpl(Value, Size);
|
||||
}
|
||||
|
||||
private:
|
||||
enum ElfMappingSymbol {
|
||||
EMS_None,
|
||||
EMS_A64,
|
||||
EMS_Data
|
||||
};
|
||||
|
||||
void EmitDataMappingSymbol() {
|
||||
if (LastEMS == EMS_Data)
|
||||
return;
|
||||
EmitMappingSymbol("$d");
|
||||
LastEMS = EMS_Data;
|
||||
}
|
||||
|
||||
void EmitA64MappingSymbol() {
|
||||
if (LastEMS == EMS_A64)
|
||||
return;
|
||||
EmitMappingSymbol("$x");
|
||||
LastEMS = EMS_A64;
|
||||
}
|
||||
|
||||
void EmitMappingSymbol(StringRef Name) {
|
||||
MCSymbol *Start = getContext().CreateTempSymbol();
|
||||
EmitLabel(Start);
|
||||
|
||||
MCSymbol *Symbol = getContext().GetOrCreateSymbol(
|
||||
Name + "." + Twine(MappingSymbolCounter++));
|
||||
|
||||
MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
|
||||
MCELF::SetType(SD, ELF::STT_NOTYPE);
|
||||
MCELF::SetBinding(SD, ELF::STB_LOCAL);
|
||||
SD.setExternal(false);
|
||||
Symbol->setSection(*getCurrentSection().first);
|
||||
|
||||
const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
|
||||
Symbol->setVariableValue(Value);
|
||||
}
|
||||
|
||||
int64_t MappingSymbolCounter;
|
||||
|
||||
DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
|
||||
ElfMappingSymbol LastEMS;
|
||||
|
||||
/// @}
|
||||
};
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
MCELFStreamer *createARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
|
||||
raw_ostream &OS, MCCodeEmitter *Emitter,
|
||||
bool RelaxAll, bool NoExecStack) {
|
||||
ARM64ELFStreamer *S = new ARM64ELFStreamer(Context, TAB, OS, Emitter);
|
||||
if (RelaxAll)
|
||||
S->getAssembler().setRelaxAll(true);
|
||||
if (NoExecStack)
|
||||
S->getAssembler().setNoExecStack(true);
|
||||
return S;
|
||||
}
|
||||
}
|
26
lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h
Normal file
26
lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h
Normal file
@ -0,0 +1,26 @@
|
||||
//===-- ARM64ELFStreamer.h - ELF Streamer for ARM64 -------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements ELF streamer information for the ARM64 backend.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_AARCH64_ELF_STREAMER_H
|
||||
#define LLVM_AARCH64_ELF_STREAMER_H
|
||||
|
||||
#include "llvm/MC/MCELFStreamer.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
MCELFStreamer *createARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
|
||||
raw_ostream &OS, MCCodeEmitter *Emitter,
|
||||
bool RelaxAll, bool NoExecStack);
|
||||
}
|
||||
|
||||
#endif // ARM64_ELF_STREAMER_H
|
72
lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h
Normal file
72
lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h
Normal file
@ -0,0 +1,72 @@
|
||||
//===-- ARM64FixupKinds.h - ARM64 Specific Fixup Entries --------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_ARM64FIXUPKINDS_H
|
||||
#define LLVM_ARM64FIXUPKINDS_H
|
||||
|
||||
#include "llvm/MC/MCFixup.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace ARM64 {
|
||||
|
||||
enum Fixups {
|
||||
// fixup_arm64_pcrel_adr_imm21 - A 21-bit pc-relative immediate inserted into
|
||||
// an ADR instruction.
|
||||
fixup_arm64_pcrel_adr_imm21 = FirstTargetFixupKind,
|
||||
|
||||
// fixup_arm64_pcrel_adrp_imm21 - A 21-bit pc-relative immediate inserted into
|
||||
// an ADRP instruction.
|
||||
fixup_arm64_pcrel_adrp_imm21,
|
||||
|
||||
// fixup_arm64_imm12 - 12-bit fixup for add/sub instructions.
|
||||
// No alignment adjustment. All value bits are encoded.
|
||||
fixup_arm64_add_imm12,
|
||||
|
||||
// fixup_arm64_ldst_imm12_* - unsigned 12-bit fixups for load and
|
||||
// store instructions.
|
||||
fixup_arm64_ldst_imm12_scale1,
|
||||
fixup_arm64_ldst_imm12_scale2,
|
||||
fixup_arm64_ldst_imm12_scale4,
|
||||
fixup_arm64_ldst_imm12_scale8,
|
||||
fixup_arm64_ldst_imm12_scale16,
|
||||
|
||||
// FIXME: comment
|
||||
fixup_arm64_movw,
|
||||
|
||||
// fixup_arm64_pcrel_imm14 - The high 14 bits of a 21-bit pc-relative
|
||||
// immediate.
|
||||
fixup_arm64_pcrel_branch14,
|
||||
|
||||
// fixup_arm64_pcrel_imm19 - The high 19 bits of a 21-bit pc-relative
|
||||
// immediate. Same encoding as fixup_arm64_pcrel_adrhi, except this
|
||||
// is not used as part of a lo/hi pair and thus generates relocations
|
||||
// directly when necessary.
|
||||
fixup_arm64_pcrel_imm19,
|
||||
|
||||
// fixup_arm64_pcrel_branch26 - The high 26 bits of a 28-bit pc-relative
|
||||
// immediate.
|
||||
fixup_arm64_pcrel_branch26,
|
||||
|
||||
// fixup_arm64_pcrel_call26 - The high 26 bits of a 28-bit pc-relative
|
||||
// immediate. Distinguished from branch26 only on ELF.
|
||||
fixup_arm64_pcrel_call26,
|
||||
|
||||
// fixup_arm64_tlsdesc_call - zero-space placeholder for the ELF
|
||||
// R_AARCH64_TLSDESC_CALL relocation.
|
||||
fixup_arm64_tlsdesc_call,
|
||||
|
||||
// Marker
|
||||
LastTargetFixupKind,
|
||||
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
|
||||
};
|
||||
|
||||
} // end namespace ARM64
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
92
lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp
Normal file
92
lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp
Normal file
@ -0,0 +1,92 @@
|
||||
//===-- ARM64MCAsmInfo.cpp - ARM64 asm properties -----------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the declarations of the ARM64MCAsmInfo properties.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ARM64MCAsmInfo.h"
|
||||
#include "llvm/MC/MCExpr.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
using namespace llvm;
|
||||
|
||||
enum AsmWriterVariantTy {
|
||||
Default = -1,
|
||||
Generic = 0,
|
||||
Apple = 1
|
||||
};
|
||||
|
||||
static cl::opt<AsmWriterVariantTy> AsmWriterVariant(
|
||||
"arm64-neon-syntax", cl::init(Default),
|
||||
cl::desc("Choose style of NEON code to emit from ARM64 backend:"),
|
||||
cl::values(clEnumValN(Generic, "generic", "Emit generic NEON assembly"),
|
||||
clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly"),
|
||||
clEnumValEnd));
|
||||
|
||||
ARM64MCAsmInfoDarwin::ARM64MCAsmInfoDarwin() {
|
||||
// We prefer NEON instructions to be printed in the short form.
|
||||
AssemblerDialect = AsmWriterVariant == Default ? 1 : AsmWriterVariant;
|
||||
|
||||
PrivateGlobalPrefix = "L";
|
||||
SeparatorString = "%%";
|
||||
CommentString = ";";
|
||||
PointerSize = CalleeSaveStackSlotSize = 8;
|
||||
|
||||
AlignmentIsInBytes = false;
|
||||
UsesELFSectionDirectiveForBSS = true;
|
||||
SupportsDebugInformation = true;
|
||||
UseDataRegionDirectives = true;
|
||||
|
||||
ExceptionsType = ExceptionHandling::DwarfCFI;
|
||||
}
|
||||
|
||||
const MCExpr *ARM64MCAsmInfoDarwin::getExprForPersonalitySymbol(
|
||||
const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const {
|
||||
// On Darwin, we can reference dwarf symbols with foo@GOT-., which
|
||||
// is an indirect pc-relative reference. The default implementation
|
||||
// won't reference using the GOT, so we need this target-specific
|
||||
// version.
|
||||
MCContext &Context = Streamer.getContext();
|
||||
const MCExpr *Res =
|
||||
MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, Context);
|
||||
MCSymbol *PCSym = Context.CreateTempSymbol();
|
||||
Streamer.EmitLabel(PCSym);
|
||||
const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context);
|
||||
return MCBinaryExpr::CreateSub(Res, PC, Context);
|
||||
}
|
||||
|
||||
ARM64MCAsmInfoELF::ARM64MCAsmInfoELF() {
|
||||
// We prefer NEON instructions to be printed in the short form.
|
||||
AssemblerDialect = AsmWriterVariant == Default ? 0 : AsmWriterVariant;
|
||||
|
||||
PointerSize = 8;
|
||||
|
||||
// ".comm align is in bytes but .align is pow-2."
|
||||
AlignmentIsInBytes = false;
|
||||
|
||||
CommentString = "//";
|
||||
PrivateGlobalPrefix = ".L";
|
||||
Code32Directive = ".code\t32";
|
||||
|
||||
Data16bitsDirective = "\t.hword\t";
|
||||
Data32bitsDirective = "\t.word\t";
|
||||
Data64bitsDirective = "\t.xword\t";
|
||||
|
||||
UseDataRegionDirectives = false;
|
||||
|
||||
WeakRefDirective = "\t.weak\t";
|
||||
|
||||
HasLEB128 = true;
|
||||
SupportsDebugInformation = true;
|
||||
|
||||
// Exceptions handling
|
||||
ExceptionsType = ExceptionHandling::DwarfCFI;
|
||||
}
|
36
lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h
Normal file
36
lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h
Normal file
@ -0,0 +1,36 @@
|
||||
//=====-- ARM64MCAsmInfo.h - ARM64 asm properties -----------*- C++ -*--====//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the declaration of the ARM64MCAsmInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef ARM64TARGETASMINFO_H
|
||||
#define ARM64TARGETASMINFO_H
|
||||
|
||||
#include "llvm/MC/MCAsmInfoDarwin.h"
|
||||
|
||||
namespace llvm {
|
||||
class Target;
|
||||
class StringRef;
|
||||
class MCStreamer;
|
||||
struct ARM64MCAsmInfoDarwin : public MCAsmInfoDarwin {
|
||||
explicit ARM64MCAsmInfoDarwin();
|
||||
virtual const MCExpr *getExprForPersonalitySymbol(const MCSymbol *Sym,
|
||||
unsigned Encoding,
|
||||
MCStreamer &Streamer) const;
|
||||
};
|
||||
|
||||
struct ARM64MCAsmInfoELF : public MCAsmInfo {
|
||||
explicit ARM64MCAsmInfoELF();
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
563
lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp
Normal file
563
lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp
Normal file
@ -0,0 +1,563 @@
|
||||
//===-- ARM64/ARM64MCCodeEmitter.cpp - Convert ARM64 code to machine code -===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the ARM64MCCodeEmitter class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "mccodeemitter"
|
||||
#include "MCTargetDesc/ARM64AddressingModes.h"
|
||||
#include "MCTargetDesc/ARM64BaseInfo.h"
|
||||
#include "MCTargetDesc/ARM64FixupKinds.h"
|
||||
#include "MCTargetDesc/ARM64MCExpr.h"
|
||||
#include "llvm/MC/MCCodeEmitter.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
using namespace llvm;
|
||||
|
||||
STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
|
||||
STATISTIC(MCNumFixups, "Number of MC fixups created.");
|
||||
|
||||
namespace {
|
||||
|
||||
class ARM64MCCodeEmitter : public MCCodeEmitter {
|
||||
MCContext &Ctx;
|
||||
|
||||
ARM64MCCodeEmitter(const ARM64MCCodeEmitter &); // DO NOT IMPLEMENT
|
||||
void operator=(const ARM64MCCodeEmitter &); // DO NOT IMPLEMENT
|
||||
public:
|
||||
ARM64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
|
||||
MCContext &ctx)
|
||||
: Ctx(ctx) {}
|
||||
|
||||
~ARM64MCCodeEmitter() {}
|
||||
|
||||
// getBinaryCodeForInstr - TableGen'erated function for getting the
|
||||
// binary encoding for an instruction.
|
||||
uint64_t getBinaryCodeForInstr(const MCInst &MI,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
/// getMachineOpValue - Return binary encoding of operand. If the machine
|
||||
/// operand requires relocation, record the relocation and return zero.
|
||||
unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
/// getAMIndexed8OpValue - Return encoding info for base register
|
||||
/// and 12-bit unsigned immediate attached to a load, store or prfm
|
||||
/// instruction. If operand requires a relocation, record it and
|
||||
/// return zero in that part of the encoding.
|
||||
template <uint32_t FixupKind>
|
||||
uint32_t getAMIndexed8OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
/// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label
|
||||
/// target.
|
||||
uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
/// getAddSubImmOpValue - Return encoding for the 12-bit immediate value and
|
||||
/// the 2-bit shift field.
|
||||
uint32_t getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
/// getCondBranchTargetOpValue - Return the encoded value for a conditional
|
||||
/// branch target.
|
||||
uint32_t getCondBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
/// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and-
|
||||
/// branch target.
|
||||
uint32_t getTestBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
/// getBranchTargetOpValue - Return the encoded value for an unconditional
|
||||
/// branch target.
|
||||
uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
/// getMoveWideImmOpValue - Return the encoded value for the immediate operand
|
||||
/// of a MOVZ or MOVK instruction.
|
||||
uint32_t getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
/// getVecShifterOpValue - Return the encoded value for the vector shifter.
|
||||
uint32_t getVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
/// getMoveVecShifterOpValue - Return the encoded value for the vector move
|
||||
/// shifter (MSL).
|
||||
uint32_t getMoveVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
/// getFixedPointScaleOpValue - Return the encoded value for the
|
||||
// FP-to-fixed-point scale factor.
|
||||
uint32_t getFixedPointScaleOpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
uint32_t getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
uint32_t getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
uint32_t getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
uint32_t getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
uint32_t getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
uint32_t getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
uint32_t getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
uint32_t getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
/// getSIMDShift64OpValue - Return the encoded value for the
|
||||
// shift-by-immediate AdvSIMD instructions.
|
||||
uint32_t getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
uint32_t getSIMDShift64_32OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
uint32_t getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
uint32_t getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
void EmitByte(unsigned char C, raw_ostream &OS) const { OS << (char)C; }
|
||||
|
||||
void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const {
|
||||
// Output the constant in little endian byte order.
|
||||
for (unsigned i = 0; i != Size; ++i) {
|
||||
EmitByte(Val & 255, OS);
|
||||
Val >>= 8;
|
||||
}
|
||||
}
|
||||
|
||||
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
MCCodeEmitter *llvm::createARM64MCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCSubtargetInfo &STI,
|
||||
MCContext &Ctx) {
|
||||
return new ARM64MCCodeEmitter(MCII, STI, Ctx);
|
||||
}
|
||||
|
||||
/// getMachineOpValue - Return binary encoding of operand. If the machine
|
||||
/// operand requires relocation, record the relocation and return zero.
|
||||
unsigned
|
||||
ARM64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
if (MO.isReg())
|
||||
return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
|
||||
else {
|
||||
assert(MO.isImm() && "did not expect relocated expression");
|
||||
return static_cast<unsigned>(MO.getImm());
|
||||
}
|
||||
|
||||
assert(0 && "Unable to encode MCOperand!");
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <uint32_t FixupKind>
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getAMIndexed8OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
unsigned BaseReg = MI.getOperand(OpIdx).getReg();
|
||||
BaseReg = Ctx.getRegisterInfo()->getEncodingValue(BaseReg);
|
||||
|
||||
const MCOperand &MO = MI.getOperand(OpIdx + 1);
|
||||
uint32_t ImmVal = 0;
|
||||
|
||||
if (MO.isImm())
|
||||
ImmVal = static_cast<uint32_t>(MO.getImm());
|
||||
else {
|
||||
assert(MO.isExpr() && "unable to encode load/store imm operand");
|
||||
MCFixupKind Kind = MCFixupKind(FixupKind);
|
||||
Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
|
||||
++MCNumFixups;
|
||||
}
|
||||
|
||||
return BaseReg | (ImmVal << 5);
|
||||
}
|
||||
|
||||
/// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label
|
||||
/// target.
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
|
||||
// If the destination is an immediate, we have nothing to do.
|
||||
if (MO.isImm())
|
||||
return MO.getImm();
|
||||
assert(MO.isExpr() && "Unexpected ADR target type!");
|
||||
const MCExpr *Expr = MO.getExpr();
|
||||
|
||||
MCFixupKind Kind = MI.getOpcode() == ARM64::ADR
|
||||
? MCFixupKind(ARM64::fixup_arm64_pcrel_adr_imm21)
|
||||
: MCFixupKind(ARM64::fixup_arm64_pcrel_adrp_imm21);
|
||||
Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
|
||||
|
||||
MCNumFixups += 1;
|
||||
|
||||
// All of the information is in the fixup.
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// getAddSubImmOpValue - Return encoding for the 12-bit immediate value and
|
||||
/// the 2-bit shift field. The shift field is stored in bits 13-14 of the
|
||||
/// return value.
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
// Suboperands are [imm, shifter].
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
|
||||
assert(ARM64_AM::getShiftType(MO1.getImm()) == ARM64_AM::LSL &&
|
||||
"unexpected shift type for add/sub immediate");
|
||||
unsigned ShiftVal = ARM64_AM::getShiftValue(MO1.getImm());
|
||||
assert((ShiftVal == 0 || ShiftVal == 12) &&
|
||||
"unexpected shift value for add/sub immediate");
|
||||
if (MO.isImm())
|
||||
return MO.getImm() | (ShiftVal == 0 ? 0 : (1 << 12));
|
||||
assert(MO.isExpr() && "Unable to encode MCOperand!");
|
||||
const MCExpr *Expr = MO.getExpr();
|
||||
assert(ShiftVal == 0 && "shift not allowed on add/sub immediate with fixup");
|
||||
|
||||
// Encode the 12 bits of the fixup.
|
||||
MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_add_imm12);
|
||||
Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
|
||||
|
||||
++MCNumFixups;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// getCondBranchTargetOpValue - Return the encoded value for a conditional
|
||||
/// branch target.
|
||||
uint32_t ARM64MCCodeEmitter::getCondBranchTargetOpValue(
|
||||
const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
|
||||
// If the destination is an immediate, we have nothing to do.
|
||||
if (MO.isImm())
|
||||
return MO.getImm();
|
||||
assert(MO.isExpr() && "Unexpected target type!");
|
||||
|
||||
MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_pcrel_imm19);
|
||||
Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
|
||||
|
||||
++MCNumFixups;
|
||||
|
||||
// All of the information is in the fixup.
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
|
||||
if (MO.isImm())
|
||||
return MO.getImm();
|
||||
assert(MO.isExpr() && "Unexpected movz/movk immediate");
|
||||
|
||||
Fixups.push_back(MCFixup::Create(
|
||||
0, MO.getExpr(), MCFixupKind(ARM64::fixup_arm64_movw), MI.getLoc()));
|
||||
|
||||
++MCNumFixups;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and-
|
||||
/// branch target.
|
||||
uint32_t ARM64MCCodeEmitter::getTestBranchTargetOpValue(
|
||||
const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
|
||||
// If the destination is an immediate, we have nothing to do.
|
||||
if (MO.isImm())
|
||||
return MO.getImm();
|
||||
assert(MO.isExpr() && "Unexpected ADR target type!");
|
||||
|
||||
MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_pcrel_branch14);
|
||||
Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
|
||||
|
||||
++MCNumFixups;
|
||||
|
||||
// All of the information is in the fixup.
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// getBranchTargetOpValue - Return the encoded value for an unconditional
|
||||
/// branch target.
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
|
||||
// If the destination is an immediate, we have nothing to do.
|
||||
if (MO.isImm())
|
||||
return MO.getImm();
|
||||
assert(MO.isExpr() && "Unexpected ADR target type!");
|
||||
|
||||
MCFixupKind Kind = MI.getOpcode() == ARM64::BL
|
||||
? MCFixupKind(ARM64::fixup_arm64_pcrel_call26)
|
||||
: MCFixupKind(ARM64::fixup_arm64_pcrel_branch26);
|
||||
Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
|
||||
|
||||
++MCNumFixups;
|
||||
|
||||
// All of the information is in the fixup.
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// getVecShifterOpValue - Return the encoded value for the vector shifter:
|
||||
///
|
||||
/// 00 -> 0
|
||||
/// 01 -> 8
|
||||
/// 10 -> 16
|
||||
/// 11 -> 24
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
assert(MO.isImm() && "Expected an immediate value for the shift amount!");
|
||||
|
||||
switch (MO.getImm()) {
|
||||
default:
|
||||
break;
|
||||
case 0:
|
||||
return 0;
|
||||
case 8:
|
||||
return 1;
|
||||
case 16:
|
||||
return 2;
|
||||
case 24:
|
||||
return 3;
|
||||
}
|
||||
|
||||
assert(false && "Invalid value for vector shift amount!");
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
assert(MO.isImm() && "Expected an immediate value for the shift amount!");
|
||||
return 64 - (MO.getImm());
|
||||
}
|
||||
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getSIMDShift64_32OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
assert(MO.isImm() && "Expected an immediate value for the shift amount!");
|
||||
return 64 - (MO.getImm() | 32);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
assert(MO.isImm() && "Expected an immediate value for the shift amount!");
|
||||
return 32 - (MO.getImm() | 16);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
assert(MO.isImm() && "Expected an immediate value for the shift amount!");
|
||||
return 16 - (MO.getImm() | 8);
|
||||
}
|
||||
|
||||
/// getFixedPointScaleOpValue - Return the encoded value for the
|
||||
// FP-to-fixed-point scale factor.
|
||||
uint32_t ARM64MCCodeEmitter::getFixedPointScaleOpValue(
|
||||
const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
assert(MO.isImm() && "Expected an immediate value for the scale amount!");
|
||||
return 64 - MO.getImm();
|
||||
}
|
||||
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
assert(MO.isImm() && "Expected an immediate value for the scale amount!");
|
||||
return 64 - MO.getImm();
|
||||
}
|
||||
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
assert(MO.isImm() && "Expected an immediate value for the scale amount!");
|
||||
return 32 - MO.getImm();
|
||||
}
|
||||
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
assert(MO.isImm() && "Expected an immediate value for the scale amount!");
|
||||
return 16 - MO.getImm();
|
||||
}
|
||||
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
assert(MO.isImm() && "Expected an immediate value for the scale amount!");
|
||||
return 8 - MO.getImm();
|
||||
}
|
||||
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
assert(MO.isImm() && "Expected an immediate value for the scale amount!");
|
||||
return MO.getImm() - 64;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
assert(MO.isImm() && "Expected an immediate value for the scale amount!");
|
||||
return MO.getImm() - 32;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
assert(MO.isImm() && "Expected an immediate value for the scale amount!");
|
||||
return MO.getImm() - 16;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
assert(MO.isImm() && "Expected an immediate value for the scale amount!");
|
||||
return MO.getImm() - 8;
|
||||
}
|
||||
|
||||
/// getMoveVecShifterOpValue - Return the encoded value for the vector move
|
||||
/// shifter (MSL).
|
||||
uint32_t
|
||||
ARM64MCCodeEmitter::getMoveVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
assert(MO.isImm() &&
|
||||
"Expected an immediate value for the move shift amount!");
|
||||
unsigned ShiftVal = ARM64_AM::getShiftValue(MO.getImm());
|
||||
assert((ShiftVal == 8 || ShiftVal == 16) && "Invalid shift amount!");
|
||||
return ShiftVal == 8 ? 0 : 1;
|
||||
}
|
||||
|
||||
unsigned ARM64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
// If one of the signed fixup kinds is applied to a MOVZ instruction, the
|
||||
// eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's
|
||||
// job to ensure that any bits possibly affected by this are 0. This means we
|
||||
// must zero out bit 30 (essentially emitting a MOVN).
|
||||
MCOperand UImm16MO = MI.getOperand(1);
|
||||
|
||||
// Nothing to do if there's no fixup.
|
||||
if (UImm16MO.isImm())
|
||||
return EncodedValue;
|
||||
|
||||
return EncodedValue & ~(1u << 30);
|
||||
}
|
||||
|
||||
void ARM64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const {
|
||||
if (MI.getOpcode() == ARM64::TLSDESCCALL) {
|
||||
// This is a directive which applies an R_AARCH64_TLSDESC_CALL to the
|
||||
// following (BLR) instruction. It doesn't emit any code itself so it
|
||||
// doesn't go through the normal TableGenerated channels.
|
||||
MCFixupKind Fixup = MCFixupKind(ARM64::fixup_arm64_tlsdesc_call);
|
||||
Fixups.push_back(MCFixup::Create(0, MI.getOperand(0).getExpr(), Fixup));
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
|
||||
EmitConstant(Binary, 4, OS);
|
||||
++MCNumEmitted; // Keep track of the # of mi's emitted.
|
||||
}
|
||||
|
||||
#include "ARM64GenMCCodeEmitter.inc"
|
168
lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp
Normal file
168
lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp
Normal file
@ -0,0 +1,168 @@
|
||||
//===-- ARM64MCExpr.cpp - ARM64 specific MC expression classes --------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the implementation of the assembly expression modifiers
|
||||
// accepted by the AArch64 architecture (e.g. ":lo12:", ":gottprel_g1:", ...).
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "aarch64symbolrefexpr"
|
||||
#include "ARM64MCExpr.h"
|
||||
#include "llvm/MC/MCAssembler.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCELF.h"
|
||||
#include "llvm/MC/MCSymbol.h"
|
||||
#include "llvm/MC/MCValue.h"
|
||||
#include "llvm/Object/ELF.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
const ARM64MCExpr *ARM64MCExpr::Create(const MCExpr *Expr, VariantKind Kind,
|
||||
MCContext &Ctx) {
|
||||
return new (Ctx) ARM64MCExpr(Expr, Kind);
|
||||
}
|
||||
|
||||
StringRef ARM64MCExpr::getVariantKindName() const {
|
||||
switch (static_cast<uint32_t>(getKind())) {
|
||||
case VK_CALL: return "";
|
||||
case VK_LO12: return ":lo12:";
|
||||
case VK_ABS_G3: return ":abs_g3:";
|
||||
case VK_ABS_G2: return ":abs_g2:";
|
||||
case VK_ABS_G2_NC: return ":abs_g2_nc:";
|
||||
case VK_ABS_G1: return ":abs_g1:";
|
||||
case VK_ABS_G1_NC: return ":abs_g1_nc:";
|
||||
case VK_ABS_G0: return ":abs_g0:";
|
||||
case VK_ABS_G0_NC: return ":abs_g0_nc:";
|
||||
case VK_DTPREL_G2: return ":dtprel_g2:";
|
||||
case VK_DTPREL_G1: return ":dtprel_g1:";
|
||||
case VK_DTPREL_G1_NC: return ":dtprel_g1_nc:";
|
||||
case VK_DTPREL_G0: return ":dtprel_g0:";
|
||||
case VK_DTPREL_G0_NC: return ":dtprel_g0_nc:";
|
||||
case VK_DTPREL_LO12: return ":dtprel_lo12:";
|
||||
case VK_DTPREL_LO12_NC: return ":dtprel_lo12_nc:";
|
||||
case VK_TPREL_G2: return ":tprel_g2:";
|
||||
case VK_TPREL_G1: return ":tprel_g1:";
|
||||
case VK_TPREL_G1_NC: return ":tprel_g1_nc:";
|
||||
case VK_TPREL_G0: return ":tprel_g0:";
|
||||
case VK_TPREL_G0_NC: return ":tprel_g0_nc:";
|
||||
case VK_TPREL_LO12: return ":tprel_lo12:";
|
||||
case VK_TPREL_LO12_NC: return ":tprel_lo12_nc:";
|
||||
case VK_TLSDESC_LO12: return ":tlsdesc_lo12:";
|
||||
case VK_ABS_PAGE: return "";
|
||||
case VK_GOT_PAGE: return ":got:";
|
||||
case VK_GOT_LO12: return ":got_lo12:";
|
||||
case VK_GOTTPREL_PAGE: return ":gottprel:";
|
||||
case VK_GOTTPREL_LO12_NC: return ":gottprel_lo12:";
|
||||
case VK_GOTTPREL_G1: return ":gottprel_g1:";
|
||||
case VK_GOTTPREL_G0_NC: return ":gottprel_g0_nc:";
|
||||
case VK_TLSDESC: return "";
|
||||
case VK_TLSDESC_PAGE: return ":tlsdesc:";
|
||||
default:
|
||||
llvm_unreachable("Invalid ELF symbol kind");
|
||||
}
|
||||
}
|
||||
|
||||
void ARM64MCExpr::PrintImpl(raw_ostream &OS) const {
|
||||
if (getKind() != VK_NONE)
|
||||
OS << getVariantKindName();
|
||||
OS << *Expr;
|
||||
}
|
||||
|
||||
// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
|
||||
// that method should be made public?
|
||||
// FIXME: really do above: now that two backends are using it.
|
||||
static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
|
||||
switch (Value->getKind()) {
|
||||
case MCExpr::Target:
|
||||
llvm_unreachable("Can't handle nested target expr!");
|
||||
break;
|
||||
|
||||
case MCExpr::Constant:
|
||||
break;
|
||||
|
||||
case MCExpr::Binary: {
|
||||
const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
|
||||
AddValueSymbolsImpl(BE->getLHS(), Asm);
|
||||
AddValueSymbolsImpl(BE->getRHS(), Asm);
|
||||
break;
|
||||
}
|
||||
|
||||
case MCExpr::SymbolRef:
|
||||
Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
|
||||
break;
|
||||
|
||||
case MCExpr::Unary:
|
||||
AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ARM64MCExpr::AddValueSymbols(MCAssembler *Asm) const {
|
||||
AddValueSymbolsImpl(getSubExpr(), Asm);
|
||||
}
|
||||
|
||||
const MCSection *ARM64MCExpr::FindAssociatedSection() const {
|
||||
llvm_unreachable("FIXME: what goes here?");
|
||||
}
|
||||
|
||||
bool ARM64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
|
||||
const MCAsmLayout *Layout) const {
|
||||
if (!getSubExpr()->EvaluateAsRelocatable(Res, Layout))
|
||||
return false;
|
||||
|
||||
Res =
|
||||
MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
|
||||
switch (Expr->getKind()) {
|
||||
case MCExpr::Target:
|
||||
llvm_unreachable("Can't handle nested target expression");
|
||||
break;
|
||||
case MCExpr::Constant:
|
||||
break;
|
||||
|
||||
case MCExpr::Binary: {
|
||||
const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
|
||||
fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
|
||||
fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
|
||||
break;
|
||||
}
|
||||
|
||||
case MCExpr::SymbolRef: {
|
||||
// We're known to be under a TLS fixup, so any symbol should be
|
||||
// modified. There should be only one.
|
||||
const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
|
||||
MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol());
|
||||
MCELF::SetType(SD, ELF::STT_TLS);
|
||||
break;
|
||||
}
|
||||
|
||||
case MCExpr::Unary:
|
||||
fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ARM64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
|
||||
switch (getSymbolLoc(Kind)) {
|
||||
default:
|
||||
return;
|
||||
case VK_DTPREL:
|
||||
case VK_GOTTPREL:
|
||||
case VK_TPREL:
|
||||
case VK_TLSDESC:
|
||||
break;
|
||||
}
|
||||
|
||||
fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
|
||||
}
|
162
lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h
Normal file
162
lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h
Normal file
@ -0,0 +1,162 @@
|
||||
//=---- ARM64MCExpr.h - ARM64 specific MC expression classes ------*- C++ -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes ARM64-specific MCExprs, used for modifiers like
|
||||
// ":lo12:" or ":gottprel_g1:".
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_ARM64MCEXPR_H
|
||||
#define LLVM_ARM64MCEXPR_H
|
||||
|
||||
#include "llvm/MC/MCExpr.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class ARM64MCExpr : public MCTargetExpr {
|
||||
public:
|
||||
enum VariantKind {
|
||||
VK_NONE = 0x000,
|
||||
|
||||
// Symbol locations specifying (roughly speaking) what calculation should be
|
||||
// performed to construct the final address for the relocated
|
||||
// symbol. E.g. direct, via the GOT, ...
|
||||
VK_ABS = 0x001,
|
||||
VK_SABS = 0x002,
|
||||
VK_GOT = 0x003,
|
||||
VK_DTPREL = 0x004,
|
||||
VK_GOTTPREL = 0x005,
|
||||
VK_TPREL = 0x006,
|
||||
VK_TLSDESC = 0x007,
|
||||
VK_SymLocBits = 0x00f,
|
||||
|
||||
// Variants specifying which part of the final address calculation is
|
||||
// used. E.g. the low 12 bits for an ADD/LDR, the middle 16 bits for a
|
||||
// MOVZ/MOVK.
|
||||
VK_PAGE = 0x010,
|
||||
VK_PAGEOFF = 0x020,
|
||||
VK_G0 = 0x030,
|
||||
VK_G1 = 0x040,
|
||||
VK_G2 = 0x050,
|
||||
VK_G3 = 0x060,
|
||||
VK_AddressFragBits = 0x0f0,
|
||||
|
||||
// Whether the final relocation is a checked one (where a linker should
|
||||
// perform a range-check on the final address) or not. Note that this field
|
||||
// is unfortunately sometimes omitted from the assembly syntax. E.g. :lo12:
|
||||
// on its own is a non-checked relocation. We side with ELF on being
|
||||
// explicit about this!
|
||||
VK_NC = 0x100,
|
||||
|
||||
// Convenience definitions for referring to specific textual representations
|
||||
// of relocation specifiers. Note that this means the "_NC" is sometimes
|
||||
// omitted in line with assembly syntax here (VK_LO12 rather than VK_LO12_NC
|
||||
// since a user would write ":lo12:").
|
||||
VK_CALL = VK_ABS,
|
||||
VK_ABS_PAGE = VK_ABS | VK_PAGE,
|
||||
VK_ABS_G3 = VK_ABS | VK_G3,
|
||||
VK_ABS_G2 = VK_ABS | VK_G2,
|
||||
VK_ABS_G2_NC = VK_ABS | VK_G2 | VK_NC,
|
||||
VK_ABS_G1 = VK_ABS | VK_G1,
|
||||
VK_ABS_G1_NC = VK_ABS | VK_G1 | VK_NC,
|
||||
VK_ABS_G0 = VK_ABS | VK_G0,
|
||||
VK_ABS_G0_NC = VK_ABS | VK_G0 | VK_NC,
|
||||
VK_LO12 = VK_ABS | VK_PAGEOFF | VK_NC,
|
||||
VK_GOT_LO12 = VK_GOT | VK_PAGEOFF | VK_NC,
|
||||
VK_GOT_PAGE = VK_GOT | VK_PAGE,
|
||||
VK_DTPREL_G2 = VK_DTPREL | VK_G2,
|
||||
VK_DTPREL_G1 = VK_DTPREL | VK_G1,
|
||||
VK_DTPREL_G1_NC = VK_DTPREL | VK_G1 | VK_NC,
|
||||
VK_DTPREL_G0 = VK_DTPREL | VK_G0,
|
||||
VK_DTPREL_G0_NC = VK_DTPREL | VK_G0 | VK_NC,
|
||||
VK_DTPREL_LO12 = VK_DTPREL | VK_PAGEOFF,
|
||||
VK_DTPREL_LO12_NC = VK_DTPREL | VK_PAGEOFF | VK_NC,
|
||||
VK_GOTTPREL_PAGE = VK_GOTTPREL | VK_PAGE,
|
||||
VK_GOTTPREL_LO12_NC = VK_GOTTPREL | VK_PAGEOFF | VK_NC,
|
||||
VK_GOTTPREL_G1 = VK_GOTTPREL | VK_G1,
|
||||
VK_GOTTPREL_G0_NC = VK_GOTTPREL | VK_G0 | VK_NC,
|
||||
VK_TPREL_G2 = VK_TPREL | VK_G2,
|
||||
VK_TPREL_G1 = VK_TPREL | VK_G1,
|
||||
VK_TPREL_G1_NC = VK_TPREL | VK_G1 | VK_NC,
|
||||
VK_TPREL_G0 = VK_TPREL | VK_G0,
|
||||
VK_TPREL_G0_NC = VK_TPREL | VK_G0 | VK_NC,
|
||||
VK_TPREL_LO12 = VK_TPREL | VK_PAGEOFF,
|
||||
VK_TPREL_LO12_NC = VK_TPREL | VK_PAGEOFF | VK_NC,
|
||||
VK_TLSDESC_LO12 = VK_TLSDESC | VK_PAGEOFF | VK_NC,
|
||||
VK_TLSDESC_PAGE = VK_TLSDESC | VK_PAGE,
|
||||
|
||||
VK_INVALID = 0xfff
|
||||
};
|
||||
|
||||
private:
|
||||
const MCExpr *Expr;
|
||||
const VariantKind Kind;
|
||||
|
||||
explicit ARM64MCExpr(const MCExpr *Expr, VariantKind Kind)
|
||||
: Expr(Expr), Kind(Kind) {}
|
||||
|
||||
public:
|
||||
/// @name Construction
|
||||
/// @{
|
||||
|
||||
static const ARM64MCExpr *Create(const MCExpr *Expr, VariantKind Kind,
|
||||
MCContext &Ctx);
|
||||
|
||||
/// @}
|
||||
/// @name Accessors
|
||||
/// @{
|
||||
|
||||
/// Get the kind of this expression.
|
||||
VariantKind getKind() const { return static_cast<VariantKind>(Kind); }
|
||||
|
||||
/// Get the expression this modifier applies to.
|
||||
const MCExpr *getSubExpr() const { return Expr; }
|
||||
|
||||
/// @}
|
||||
/// @name VariantKind information extractors.
|
||||
/// @{
|
||||
|
||||
static VariantKind getSymbolLoc(VariantKind Kind) {
|
||||
return static_cast<VariantKind>(Kind & VK_SymLocBits);
|
||||
}
|
||||
|
||||
static VariantKind getAddressFrag(VariantKind Kind) {
|
||||
return static_cast<VariantKind>(Kind & VK_AddressFragBits);
|
||||
}
|
||||
|
||||
static bool isNotChecked(VariantKind Kind) { return Kind & VK_NC; }
|
||||
|
||||
/// @}
|
||||
|
||||
/// Convert the variant kind into an ELF-appropriate modifier
|
||||
/// (e.g. ":got:", ":lo12:").
|
||||
StringRef getVariantKindName() const;
|
||||
|
||||
void PrintImpl(raw_ostream &OS) const;
|
||||
|
||||
void AddValueSymbols(MCAssembler *) const;
|
||||
|
||||
const MCSection *FindAssociatedSection() const;
|
||||
|
||||
bool EvaluateAsRelocatableImpl(MCValue &Res,
|
||||
const MCAsmLayout *Layout) const;
|
||||
|
||||
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const;
|
||||
|
||||
static bool classof(const MCExpr *E) {
|
||||
return E->getKind() == MCExpr::Target;
|
||||
}
|
||||
|
||||
static bool classof(const ARM64MCExpr *) { return true; }
|
||||
|
||||
};
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
167
lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp
Normal file
167
lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp
Normal file
@ -0,0 +1,167 @@
|
||||
//===-- ARM64MCTargetDesc.cpp - ARM64 Target Descriptions -------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file provides ARM64 specific target descriptions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ARM64MCTargetDesc.h"
|
||||
#include "ARM64ELFStreamer.h"
|
||||
#include "ARM64MCAsmInfo.h"
|
||||
#include "InstPrinter/ARM64InstPrinter.h"
|
||||
#include "llvm/MC/MCCodeGenInfo.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
|
||||
#define GET_INSTRINFO_MC_DESC
|
||||
#include "ARM64GenInstrInfo.inc"
|
||||
|
||||
#define GET_SUBTARGETINFO_MC_DESC
|
||||
#include "ARM64GenSubtargetInfo.inc"
|
||||
|
||||
#define GET_REGINFO_MC_DESC
|
||||
#include "ARM64GenRegisterInfo.inc"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static MCInstrInfo *createARM64MCInstrInfo() {
|
||||
MCInstrInfo *X = new MCInstrInfo();
|
||||
InitARM64MCInstrInfo(X);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCSubtargetInfo *createARM64MCSubtargetInfo(StringRef TT, StringRef CPU,
|
||||
StringRef FS) {
|
||||
MCSubtargetInfo *X = new MCSubtargetInfo();
|
||||
InitARM64MCSubtargetInfo(X, TT, CPU, FS);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCRegisterInfo *createARM64MCRegisterInfo(StringRef Triple) {
|
||||
MCRegisterInfo *X = new MCRegisterInfo();
|
||||
InitARM64MCRegisterInfo(X, ARM64::LR);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCAsmInfo *createARM64MCAsmInfo(const MCRegisterInfo &MRI,
|
||||
StringRef TT) {
|
||||
Triple TheTriple(TT);
|
||||
|
||||
MCAsmInfo *MAI;
|
||||
if (TheTriple.isOSDarwin())
|
||||
MAI = new ARM64MCAsmInfoDarwin();
|
||||
else {
|
||||
assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF");
|
||||
MAI = new ARM64MCAsmInfoELF();
|
||||
}
|
||||
|
||||
// Initial state of the frame pointer is SP.
|
||||
unsigned Reg = MRI.getDwarfRegNum(ARM64::SP, true);
|
||||
MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, Reg, 0);
|
||||
MAI->addInitialFrameState(Inst);
|
||||
|
||||
return MAI;
|
||||
}
|
||||
|
||||
MCCodeGenInfo *createARM64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
|
||||
CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL) {
|
||||
Triple TheTriple(TT);
|
||||
assert((TheTriple.isOSBinFormatELF() || TheTriple.isOSBinFormatMachO()) &&
|
||||
"Only expect Darwin and ELF targets");
|
||||
|
||||
if (CM == CodeModel::Default)
|
||||
CM = CodeModel::Small;
|
||||
// The default MCJIT memory managers make no guarantees about where they can
|
||||
// find an executable page; JITed code needs to be able to refer to globals
|
||||
// no matter how far away they are.
|
||||
else if (CM == CodeModel::JITDefault)
|
||||
CM = CodeModel::Large;
|
||||
else if (CM != CodeModel::Small && CM != CodeModel::Large)
|
||||
report_fatal_error("Only small and large code models are allowed on ARM64");
|
||||
|
||||
// ARM64 Darwin is always PIC.
|
||||
if (TheTriple.isOSDarwin())
|
||||
RM = Reloc::PIC_;
|
||||
// On ELF platforms the default static relocation model has a smart enough
|
||||
// linker to cope with referencing external symbols defined in a shared
|
||||
// library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
|
||||
else if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC)
|
||||
RM = Reloc::Static;
|
||||
|
||||
MCCodeGenInfo *X = new MCCodeGenInfo();
|
||||
X->InitMCCodeGenInfo(RM, CM, OL);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCInstPrinter *createARM64MCInstPrinter(const Target &T,
|
||||
unsigned SyntaxVariant,
|
||||
const MCAsmInfo &MAI,
|
||||
const MCInstrInfo &MII,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCSubtargetInfo &STI) {
|
||||
if (SyntaxVariant == 0)
|
||||
return new ARM64InstPrinter(MAI, MII, MRI, STI);
|
||||
if (SyntaxVariant == 1)
|
||||
return new ARM64AppleInstPrinter(MAI, MII, MRI, STI);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
|
||||
MCContext &Ctx, MCAsmBackend &TAB,
|
||||
raw_ostream &OS, MCCodeEmitter *Emitter,
|
||||
const MCSubtargetInfo &STI, bool RelaxAll,
|
||||
bool NoExecStack) {
|
||||
Triple TheTriple(TT);
|
||||
|
||||
if (TheTriple.isOSDarwin())
|
||||
return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
|
||||
/*LabelSections*/ true);
|
||||
|
||||
return createARM64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
|
||||
}
|
||||
|
||||
// Force static initialization.
|
||||
extern "C" void LLVMInitializeARM64TargetMC() {
|
||||
// Register the MC asm info.
|
||||
RegisterMCAsmInfoFn X(TheARM64Target, createARM64MCAsmInfo);
|
||||
|
||||
// Register the MC codegen info.
|
||||
TargetRegistry::RegisterMCCodeGenInfo(TheARM64Target,
|
||||
createARM64MCCodeGenInfo);
|
||||
|
||||
// Register the MC instruction info.
|
||||
TargetRegistry::RegisterMCInstrInfo(TheARM64Target, createARM64MCInstrInfo);
|
||||
|
||||
// Register the MC register info.
|
||||
TargetRegistry::RegisterMCRegInfo(TheARM64Target, createARM64MCRegisterInfo);
|
||||
|
||||
// Register the MC subtarget info.
|
||||
TargetRegistry::RegisterMCSubtargetInfo(TheARM64Target,
|
||||
createARM64MCSubtargetInfo);
|
||||
|
||||
// Register the asm backend.
|
||||
TargetRegistry::RegisterMCAsmBackend(TheARM64Target, createARM64AsmBackend);
|
||||
|
||||
// Register the MC Code Emitter
|
||||
TargetRegistry::RegisterMCCodeEmitter(TheARM64Target,
|
||||
createARM64MCCodeEmitter);
|
||||
|
||||
// Register the object streamer.
|
||||
TargetRegistry::RegisterMCObjectStreamer(TheARM64Target, createMCStreamer);
|
||||
|
||||
// Register the MCInstPrinter.
|
||||
TargetRegistry::RegisterMCInstPrinter(TheARM64Target,
|
||||
createARM64MCInstPrinter);
|
||||
}
|
62
lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h
Normal file
62
lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h
Normal file
@ -0,0 +1,62 @@
|
||||
//===-- ARM64MCTargetDesc.h - ARM64 Target Descriptions ---------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file provides ARM64 specific target descriptions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef ARM64MCTARGETDESC_H
|
||||
#define ARM64MCTARGETDESC_H
|
||||
|
||||
#include "llvm/Support/DataTypes.h"
|
||||
#include <string>
|
||||
|
||||
namespace llvm {
|
||||
class MCAsmBackend;
|
||||
class MCCodeEmitter;
|
||||
class MCContext;
|
||||
class MCInstrInfo;
|
||||
class MCRegisterInfo;
|
||||
class MCObjectWriter;
|
||||
class MCSubtargetInfo;
|
||||
class StringRef;
|
||||
class Target;
|
||||
class raw_ostream;
|
||||
|
||||
extern Target TheARM64Target;
|
||||
|
||||
MCCodeEmitter *createARM64MCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCSubtargetInfo &STI,
|
||||
MCContext &Ctx);
|
||||
MCAsmBackend *createARM64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
|
||||
StringRef TT, StringRef CPU);
|
||||
|
||||
MCObjectWriter *createARM64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI);
|
||||
|
||||
MCObjectWriter *createARM64MachObjectWriter(raw_ostream &OS, uint32_t CPUType,
|
||||
uint32_t CPUSubtype);
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
// Defines symbolic names for ARM64 registers. This defines a mapping from
|
||||
// register name to register number.
|
||||
//
|
||||
#define GET_REGINFO_ENUM
|
||||
#include "ARM64GenRegisterInfo.inc"
|
||||
|
||||
// Defines symbolic names for the ARM64 instructions.
|
||||
//
|
||||
#define GET_INSTRINFO_ENUM
|
||||
#include "ARM64GenInstrInfo.inc"
|
||||
|
||||
#define GET_SUBTARGETINFO_ENUM
|
||||
#include "ARM64GenSubtargetInfo.inc"
|
||||
|
||||
#endif
|
396
lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp
Normal file
396
lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp
Normal file
@ -0,0 +1,396 @@
|
||||
//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "MCTargetDesc/ARM64FixupKinds.h"
|
||||
#include "MCTargetDesc/ARM64MCTargetDesc.h"
|
||||
#include "llvm/MC/MCAssembler.h"
|
||||
#include "llvm/MC/MCAsmLayout.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCExpr.h"
|
||||
#include "llvm/MC/MCFixup.h"
|
||||
#include "llvm/MC/MCMachObjectWriter.h"
|
||||
#include "llvm/MC/MCSectionMachO.h"
|
||||
#include "llvm/MC/MCValue.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/MachO.h"
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
class ARM64MachObjectWriter : public MCMachObjectTargetWriter {
|
||||
bool getARM64FixupKindMachOInfo(const MCFixup &Fixup, unsigned &RelocType,
|
||||
const MCSymbolRefExpr *Sym,
|
||||
unsigned &Log2Size, const MCAssembler &Asm);
|
||||
|
||||
public:
|
||||
ARM64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype)
|
||||
: MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype,
|
||||
/*UseAggressiveSymbolFolding=*/true) {}
|
||||
|
||||
void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
|
||||
const MCAsmLayout &Layout, const MCFragment *Fragment,
|
||||
const MCFixup &Fixup, MCValue Target,
|
||||
uint64_t &FixedValue);
|
||||
};
|
||||
}
|
||||
|
||||
bool ARM64MachObjectWriter::getARM64FixupKindMachOInfo(
|
||||
const MCFixup &Fixup, unsigned &RelocType, const MCSymbolRefExpr *Sym,
|
||||
unsigned &Log2Size, const MCAssembler &Asm) {
|
||||
RelocType = unsigned(MachO::ARM64_RELOC_UNSIGNED);
|
||||
Log2Size = ~0U;
|
||||
|
||||
switch ((unsigned)Fixup.getKind()) {
|
||||
default:
|
||||
return false;
|
||||
|
||||
case FK_Data_1:
|
||||
Log2Size = llvm::Log2_32(1);
|
||||
return true;
|
||||
case FK_Data_2:
|
||||
Log2Size = llvm::Log2_32(2);
|
||||
return true;
|
||||
case FK_Data_4:
|
||||
Log2Size = llvm::Log2_32(4);
|
||||
if (Sym->getKind() == MCSymbolRefExpr::VK_GOT)
|
||||
RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT);
|
||||
return true;
|
||||
case FK_Data_8:
|
||||
Log2Size = llvm::Log2_32(8);
|
||||
if (Sym->getKind() == MCSymbolRefExpr::VK_GOT)
|
||||
RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT);
|
||||
return true;
|
||||
case ARM64::fixup_arm64_add_imm12:
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale1:
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale2:
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale4:
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale8:
|
||||
case ARM64::fixup_arm64_ldst_imm12_scale16:
|
||||
Log2Size = llvm::Log2_32(4);
|
||||
switch (Sym->getKind()) {
|
||||
default:
|
||||
assert(0 && "Unexpected symbol reference variant kind!");
|
||||
case MCSymbolRefExpr::VK_PAGEOFF:
|
||||
RelocType = unsigned(MachO::ARM64_RELOC_PAGEOFF12);
|
||||
return true;
|
||||
case MCSymbolRefExpr::VK_GOTPAGEOFF:
|
||||
RelocType = unsigned(MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12);
|
||||
return true;
|
||||
case MCSymbolRefExpr::VK_TLVPPAGEOFF:
|
||||
RelocType = unsigned(MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12);
|
||||
return true;
|
||||
}
|
||||
case ARM64::fixup_arm64_pcrel_adrp_imm21:
|
||||
Log2Size = llvm::Log2_32(4);
|
||||
// This encompasses the relocation for the whole 21-bit value.
|
||||
switch (Sym->getKind()) {
|
||||
default:
|
||||
Asm.getContext().FatalError(Fixup.getLoc(),
|
||||
"ADR/ADRP relocations must be GOT relative");
|
||||
case MCSymbolRefExpr::VK_PAGE:
|
||||
RelocType = unsigned(MachO::ARM64_RELOC_PAGE21);
|
||||
return true;
|
||||
case MCSymbolRefExpr::VK_GOTPAGE:
|
||||
RelocType = unsigned(MachO::ARM64_RELOC_GOT_LOAD_PAGE21);
|
||||
return true;
|
||||
case MCSymbolRefExpr::VK_TLVPPAGE:
|
||||
RelocType = unsigned(MachO::ARM64_RELOC_TLVP_LOAD_PAGE21);
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
case ARM64::fixup_arm64_pcrel_branch26:
|
||||
case ARM64::fixup_arm64_pcrel_call26:
|
||||
Log2Size = llvm::Log2_32(4);
|
||||
RelocType = unsigned(MachO::ARM64_RELOC_BRANCH26);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
void ARM64MachObjectWriter::RecordRelocation(
|
||||
MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
|
||||
const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
|
||||
uint64_t &FixedValue) {
|
||||
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
|
||||
|
||||
// See <reloc.h>.
|
||||
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment);
|
||||
unsigned Log2Size = 0;
|
||||
int64_t Value = 0;
|
||||
unsigned Index = 0;
|
||||
unsigned IsExtern = 0;
|
||||
unsigned Type = 0;
|
||||
unsigned Kind = Fixup.getKind();
|
||||
|
||||
FixupOffset += Fixup.getOffset();
|
||||
|
||||
// ARM64 pcrel relocation addends do not include the section offset.
|
||||
if (IsPCRel)
|
||||
FixedValue += FixupOffset;
|
||||
|
||||
// ADRP fixups use relocations for the whole symbol value and only
|
||||
// put the addend in the instruction itself. Clear out any value the
|
||||
// generic code figured out from the sybmol definition.
|
||||
if (Kind == ARM64::fixup_arm64_pcrel_adrp_imm21 ||
|
||||
Kind == ARM64::fixup_arm64_pcrel_imm19)
|
||||
FixedValue = 0;
|
||||
|
||||
// imm19 relocations are for conditional branches, which require
|
||||
// assembler local symbols. If we got here, that's not what we have,
|
||||
// so complain loudly.
|
||||
if (Kind == ARM64::fixup_arm64_pcrel_imm19) {
|
||||
Asm.getContext().FatalError(Fixup.getLoc(),
|
||||
"conditional branch requires assembler-local"
|
||||
" label. '" +
|
||||
Target.getSymA()->getSymbol().getName() +
|
||||
"' is external.");
|
||||
return;
|
||||
}
|
||||
|
||||
// 14-bit branch relocations should only target internal labels, and so
|
||||
// should never get here.
|
||||
if (Kind == ARM64::fixup_arm64_pcrel_branch14) {
|
||||
Asm.getContext().FatalError(Fixup.getLoc(),
|
||||
"Invalid relocation on conditional branch!");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!getARM64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size,
|
||||
Asm)) {
|
||||
Asm.getContext().FatalError(Fixup.getLoc(), "unknown ARM64 fixup kind!");
|
||||
return;
|
||||
}
|
||||
|
||||
Value = Target.getConstant();
|
||||
|
||||
if (Target.isAbsolute()) { // constant
|
||||
// FIXME: Should this always be extern?
|
||||
// SymbolNum of 0 indicates the absolute section.
|
||||
Type = MachO::ARM64_RELOC_UNSIGNED;
|
||||
Index = 0;
|
||||
|
||||
if (IsPCRel) {
|
||||
IsExtern = 1;
|
||||
Asm.getContext().FatalError(Fixup.getLoc(),
|
||||
"PC relative absolute relocation!");
|
||||
|
||||
// FIXME: x86_64 sets the type to a branch reloc here. Should we do
|
||||
// something similar?
|
||||
}
|
||||
} else if (Target.getSymB()) { // A - B + constant
|
||||
const MCSymbol *A = &Target.getSymA()->getSymbol();
|
||||
MCSymbolData &A_SD = Asm.getSymbolData(*A);
|
||||
const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
|
||||
|
||||
const MCSymbol *B = &Target.getSymB()->getSymbol();
|
||||
MCSymbolData &B_SD = Asm.getSymbolData(*B);
|
||||
const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
|
||||
|
||||
// Check for "_foo@got - .", which comes through here as:
|
||||
// Ltmp0:
|
||||
// ... _foo@got - Ltmp0
|
||||
if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOT &&
|
||||
Target.getSymB()->getKind() == MCSymbolRefExpr::VK_None &&
|
||||
Layout.getSymbolOffset(&B_SD) ==
|
||||
Layout.getFragmentOffset(Fragment) + Fixup.getOffset()) {
|
||||
// SymB is the PC, so use a PC-rel pointer-to-GOT relocation.
|
||||
Index = A_Base->getIndex();
|
||||
IsExtern = 1;
|
||||
Type = MachO::ARM64_RELOC_POINTER_TO_GOT;
|
||||
IsPCRel = 1;
|
||||
MachO::any_relocation_info MRE;
|
||||
MRE.r_word0 = FixupOffset;
|
||||
MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
|
||||
(IsExtern << 27) | (Type << 28));
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
return;
|
||||
} else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
|
||||
Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
|
||||
// Otherwise, neither symbol can be modified.
|
||||
Asm.getContext().FatalError(Fixup.getLoc(),
|
||||
"unsupported relocation of modified symbol");
|
||||
|
||||
// We don't support PCrel relocations of differences.
|
||||
if (IsPCRel)
|
||||
Asm.getContext().FatalError(Fixup.getLoc(),
|
||||
"unsupported pc-relative relocation of "
|
||||
"difference");
|
||||
|
||||
// ARM64 always uses external relocations. If there is no symbol to use as
|
||||
// a base address (a local symbol with no preceeding non-local symbol),
|
||||
// error out.
|
||||
//
|
||||
// FIXME: We should probably just synthesize an external symbol and use
|
||||
// that.
|
||||
if (!A_Base)
|
||||
Asm.getContext().FatalError(
|
||||
Fixup.getLoc(),
|
||||
"unsupported relocation of local symbol '" + A->getName() +
|
||||
"'. Must have non-local symbol earlier in section.");
|
||||
if (!B_Base)
|
||||
Asm.getContext().FatalError(
|
||||
Fixup.getLoc(),
|
||||
"unsupported relocation of local symbol '" + B->getName() +
|
||||
"'. Must have non-local symbol earlier in section.");
|
||||
|
||||
if (A_Base == B_Base && A_Base)
|
||||
Asm.getContext().FatalError(Fixup.getLoc(),
|
||||
"unsupported relocation with identical base");
|
||||
|
||||
Value += (A_SD.getFragment() == NULL ? 0 : Writer->getSymbolAddress(
|
||||
&A_SD, Layout)) -
|
||||
(A_Base == NULL || A_Base->getFragment() == NULL
|
||||
? 0
|
||||
: Writer->getSymbolAddress(A_Base, Layout));
|
||||
Value -= (B_SD.getFragment() == NULL ? 0 : Writer->getSymbolAddress(
|
||||
&B_SD, Layout)) -
|
||||
(B_Base == NULL || B_Base->getFragment() == NULL
|
||||
? 0
|
||||
: Writer->getSymbolAddress(B_Base, Layout));
|
||||
|
||||
Index = A_Base->getIndex();
|
||||
IsExtern = 1;
|
||||
Type = MachO::ARM64_RELOC_UNSIGNED;
|
||||
|
||||
MachO::any_relocation_info MRE;
|
||||
MRE.r_word0 = FixupOffset;
|
||||
MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
|
||||
(IsExtern << 27) | (Type << 28));
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
|
||||
Index = B_Base->getIndex();
|
||||
IsExtern = 1;
|
||||
Type = MachO::ARM64_RELOC_SUBTRACTOR;
|
||||
} else { // A + constant
|
||||
const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
|
||||
MCSymbolData &SD = Asm.getSymbolData(*Symbol);
|
||||
const MCSymbolData *Base = Asm.getAtom(&SD);
|
||||
const MCSectionMachO &Section = static_cast<const MCSectionMachO &>(
|
||||
Fragment->getParent()->getSection());
|
||||
|
||||
// If the symbol is a variable and we weren't able to get a Base for it
|
||||
// (i.e., it's not in the symbol table associated with a section) resolve
|
||||
// the relocation based its expansion instead.
|
||||
if (Symbol->isVariable() && !Base) {
|
||||
// If the evaluation is an absolute value, just use that directly
|
||||
// to keep things easy.
|
||||
int64_t Res;
|
||||
if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute(
|
||||
Res, Layout, Writer->getSectionAddressMap())) {
|
||||
FixedValue = Res;
|
||||
return;
|
||||
}
|
||||
|
||||
// FIXME: Will the Target we already have ever have any data in it
|
||||
// we need to preserve and merge with the new Target? How about
|
||||
// the FixedValue?
|
||||
if (!Symbol->getVariableValue()->EvaluateAsRelocatable(Target, &Layout))
|
||||
Asm.getContext().FatalError(Fixup.getLoc(),
|
||||
"unable to resolve variable '" +
|
||||
Symbol->getName() + "'");
|
||||
return RecordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
|
||||
FixedValue);
|
||||
}
|
||||
|
||||
// Relocations inside debug sections always use local relocations when
|
||||
// possible. This seems to be done because the debugger doesn't fully
|
||||
// understand relocation entries and expects to find values that
|
||||
// have already been fixed up.
|
||||
if (Symbol->isInSection()) {
|
||||
if (Section.hasAttribute(MachO::S_ATTR_DEBUG))
|
||||
Base = 0;
|
||||
}
|
||||
|
||||
// ARM64 uses external relocations as much as possible. For debug sections,
|
||||
// and for pointer-sized relocations (.quad), we allow section relocations.
|
||||
// It's code sections that run into trouble.
|
||||
if (Base) {
|
||||
Index = Base->getIndex();
|
||||
IsExtern = 1;
|
||||
|
||||
// Add the local offset, if needed.
|
||||
if (Base != &SD)
|
||||
Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
|
||||
} else if (Symbol->isInSection()) {
|
||||
// Pointer-sized relocations can use a local relocation. Otherwise,
|
||||
// we have to be in a debug info section.
|
||||
if (!Section.hasAttribute(MachO::S_ATTR_DEBUG) && Log2Size != 3)
|
||||
Asm.getContext().FatalError(
|
||||
Fixup.getLoc(),
|
||||
"unsupported relocation of local symbol '" + Symbol->getName() +
|
||||
"'. Must have non-local symbol earlier in section.");
|
||||
// Adjust the relocation to be section-relative.
|
||||
// The index is the section ordinal (1-based).
|
||||
const MCSectionData &SymSD =
|
||||
Asm.getSectionData(SD.getSymbol().getSection());
|
||||
Index = SymSD.getOrdinal() + 1;
|
||||
IsExtern = 0;
|
||||
Value += Writer->getSymbolAddress(&SD, Layout);
|
||||
|
||||
if (IsPCRel)
|
||||
Value -= Writer->getFragmentAddress(Fragment, Layout) +
|
||||
Fixup.getOffset() + (1 << Log2Size);
|
||||
} else {
|
||||
// Resolve constant variables.
|
||||
if (SD.getSymbol().isVariable()) {
|
||||
int64_t Res;
|
||||
if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute(
|
||||
Res, Layout, Writer->getSectionAddressMap())) {
|
||||
FixedValue = Res;
|
||||
return;
|
||||
}
|
||||
}
|
||||
Asm.getContext().FatalError(Fixup.getLoc(),
|
||||
"unsupported relocation of variable '" +
|
||||
Symbol->getName() + "'");
|
||||
}
|
||||
}
|
||||
|
||||
// If the relocation kind is Branch26, Page21, or Pageoff12, any addend
|
||||
// is represented via an Addend relocation, not encoded directly into
|
||||
// the instruction.
|
||||
if ((Type == MachO::ARM64_RELOC_BRANCH26 ||
|
||||
Type == MachO::ARM64_RELOC_PAGE21 ||
|
||||
Type == MachO::ARM64_RELOC_PAGEOFF12) &&
|
||||
Value) {
|
||||
assert((Value & 0xff000000) == 0 && "Added relocation out of range!");
|
||||
|
||||
MachO::any_relocation_info MRE;
|
||||
MRE.r_word0 = FixupOffset;
|
||||
MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
|
||||
(IsExtern << 27) | (Type << 28));
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
|
||||
// Now set up the Addend relocation.
|
||||
Type = MachO::ARM64_RELOC_ADDEND;
|
||||
Index = Value;
|
||||
IsPCRel = 0;
|
||||
Log2Size = 2;
|
||||
IsExtern = 0;
|
||||
|
||||
// Put zero into the instruction itself. The addend is in the relocation.
|
||||
Value = 0;
|
||||
}
|
||||
|
||||
// If there's any addend left to handle, encode it in the instruction.
|
||||
FixedValue = Value;
|
||||
|
||||
// struct relocation_info (8 bytes)
|
||||
MachO::any_relocation_info MRE;
|
||||
MRE.r_word0 = FixupOffset;
|
||||
MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
|
||||
(IsExtern << 27) | (Type << 28));
|
||||
Writer->addRelocation(Fragment->getParent(), MRE);
|
||||
}
|
||||
|
||||
MCObjectWriter *llvm::createARM64MachObjectWriter(raw_ostream &OS,
|
||||
uint32_t CPUType,
|
||||
uint32_t CPUSubtype) {
|
||||
return createMachObjectWriter(new ARM64MachObjectWriter(CPUType, CPUSubtype),
|
||||
OS, /*IsLittleEndian=*/true);
|
||||
}
|
14
lib/Target/ARM64/MCTargetDesc/CMakeLists.txt
Normal file
14
lib/Target/ARM64/MCTargetDesc/CMakeLists.txt
Normal file
@ -0,0 +1,14 @@
|
||||
add_llvm_library(LLVMARM64Desc
|
||||
ARM64AsmBackend.cpp
|
||||
ARM64ELFObjectWriter.cpp
|
||||
ARM64ELFStreamer.cpp
|
||||
ARM64MCAsmInfo.cpp
|
||||
ARM64MCCodeEmitter.cpp
|
||||
ARM64MCExpr.cpp
|
||||
ARM64MCTargetDesc.cpp
|
||||
ARM64MachObjectWriter.cpp
|
||||
)
|
||||
add_dependencies(LLVMARM64Desc ARM64CommonTableGen)
|
||||
|
||||
# Hack: we need to include 'main' target directory to grab private headers
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
|
24
lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt
Normal file
24
lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt
Normal file
@ -0,0 +1,24 @@
|
||||
;===- ./lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
|
||||
;
|
||||
; The LLVM Compiler Infrastructure
|
||||
;
|
||||
; This file is distributed under the University of Illinois Open Source
|
||||
; License. See LICENSE.TXT for details.
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
;
|
||||
; This is an LLVMBuild description file for the components in this subdirectory.
|
||||
;
|
||||
; For more information on the LLVMBuild system, please see:
|
||||
;
|
||||
; http://llvm.org/docs/LLVMBuild.html
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
[component_0]
|
||||
type = Library
|
||||
name = ARM64Desc
|
||||
parent = ARM64
|
||||
required_libraries = ARM64AsmPrinter ARM64Info MC Support
|
||||
add_to_library_groups = ARM64
|
||||
|
16
lib/Target/ARM64/MCTargetDesc/Makefile
Normal file
16
lib/Target/ARM64/MCTargetDesc/Makefile
Normal file
@ -0,0 +1,16 @@
|
||||
##===- lib/Target/ARM64/TargetDesc/Makefile ----------------*- Makefile -*-===##
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
LEVEL = ../../../..
|
||||
LIBRARYNAME = LLVMARM64Desc
|
||||
|
||||
# Hack: we need to include 'main' target directory to grab private headers
|
||||
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
|
||||
|
||||
include $(LEVEL)/Makefile.common
|
25
lib/Target/ARM64/Makefile
Normal file
25
lib/Target/ARM64/Makefile
Normal file
@ -0,0 +1,25 @@
|
||||
##===- lib/Target/ARM64/Makefile ---------------------------*- Makefile -*-===##
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
LEVEL = ../../..
|
||||
LIBRARYNAME = LLVMARM64CodeGen
|
||||
TARGET = ARM64
|
||||
|
||||
# Make sure that tblgen is run, first thing.
|
||||
BUILT_SOURCES = ARM64GenRegisterInfo.inc ARM64GenInstrInfo.inc \
|
||||
ARM64GenAsmWriter.inc ARM64GenAsmWriter1.inc \
|
||||
ARM64GenDAGISel.inc \
|
||||
ARM64GenCallingConv.inc ARM64GenAsmMatcher.inc \
|
||||
ARM64GenSubtargetInfo.inc ARM64GenMCCodeEmitter.inc \
|
||||
ARM64GenFastISel.inc ARM64GenDisassemblerTables.inc \
|
||||
ARM64GenMCPseudoLowering.inc
|
||||
|
||||
DIRS = TargetInfo InstPrinter AsmParser Disassembler MCTargetDesc
|
||||
|
||||
include $(LEVEL)/Makefile.common
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user