mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-11 00:39:36 +00:00
ARM: add cyclone CPU with ZeroCycleZeroing feature.
The Cyclone CPU is similar to swift for most LLVM purposes, but does have two preferred instructions for zeroing a VFP register. This teaches LLVM about them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205309 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
279edf967e
commit
c077472250
lib/Target/ARM
test/CodeGen/ARM
@ -73,6 +73,11 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
|
||||
def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
|
||||
"Enable support for CRC instructions">;
|
||||
|
||||
// Cyclone has preferred instructions for zeroing VFP registers, which can
|
||||
// execute in 0 cycles.
|
||||
def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
|
||||
"Has zero-cycle zeroing instructions">;
|
||||
|
||||
// Some processors have FP multiply-accumulate instructions that don't
|
||||
// play nicely with other VFP / NEON instructions, and it's generally better
|
||||
// to just not use them.
|
||||
@ -361,6 +366,13 @@ def : ProcessorModel<"cortex-a15", CortexA9Model,
|
||||
FeatureDSPThumb2, FeatureHasRAS,
|
||||
FeatureAClass]>;
|
||||
|
||||
// FIXME: krait has currently the same Schedule model as A9
|
||||
def : ProcessorModel<"krait", CortexA9Model,
|
||||
[ProcKrait, HasV7Ops,
|
||||
FeatureNEON, FeatureDB,
|
||||
FeatureDSPThumb2, FeatureHasRAS,
|
||||
FeatureAClass]>;
|
||||
|
||||
// FIXME: R5 has currently the same ProcessorModel as A8.
|
||||
def : ProcessorModel<"cortex-r5", CortexA8Model,
|
||||
[ProcR5, HasV7Ops, FeatureDB,
|
||||
@ -395,12 +407,12 @@ def : ProcNoItin<"cortex-a57", [ProcA57, HasV8Ops, FeatureAClass,
|
||||
FeatureDB, FeatureFPARMv8,
|
||||
FeatureNEON, FeatureDSPThumb2]>;
|
||||
|
||||
// FIXME: krait has currently the same Schedule model as A9
|
||||
def : ProcessorModel<"krait", CortexA9Model,
|
||||
[ProcKrait, HasV7Ops,
|
||||
FeatureNEON, FeatureDB,
|
||||
FeatureDSPThumb2, FeatureHasRAS,
|
||||
FeatureAClass]>;
|
||||
// Cyclone is very similar to swift
|
||||
def : ProcessorModel<"cyclone", SwiftModel,
|
||||
[ProcSwift, HasV8Ops, HasV7Ops,
|
||||
FeatureCrypto, FeatureFPARMv8,
|
||||
FeatureDB,FeatureDSPThumb2,
|
||||
FeatureHasRAS, FeatureZCZeroing]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register File Description
|
||||
|
@ -244,6 +244,7 @@ def HasMP : Predicate<"Subtarget->hasMPExtension()">,
|
||||
def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">,
|
||||
AssemblerPredicate<"FeatureTrustZone",
|
||||
"TrustZone">;
|
||||
def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">;
|
||||
def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
|
||||
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
|
||||
def IsThumb : Predicate<"Subtarget->isThumb()">,
|
||||
|
@ -5245,6 +5245,26 @@ def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
|
||||
[(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
|
||||
} // isReMaterializable
|
||||
|
||||
|
||||
// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
|
||||
// require zero cycles to execute so they should be used wherever possible for
|
||||
// setting a register to zero.
|
||||
|
||||
// Even without these pseudo-insts we would probably end up with the correct
|
||||
// instruction, but we could not mark the general ones with "isAsCheapAsAMove"
|
||||
// since they are sometimes rather expensive (in general).
|
||||
|
||||
let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
|
||||
def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
|
||||
[(set DPR:$Vd, (v2i32 NEONimmAllZerosV))],
|
||||
(VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
|
||||
Requires<[HasZCZ]>;
|
||||
def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
|
||||
[(set QPR:$Vd, (v4i32 NEONimmAllZerosV))],
|
||||
(VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
|
||||
Requires<[HasZCZ]>;
|
||||
}
|
||||
|
||||
// VMOV : Vector Get Lane (move scalar to ARM core register)
|
||||
|
||||
def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
|
||||
|
@ -134,6 +134,7 @@ void ARMSubtarget::initializeEnvironment() {
|
||||
HasTrustZone = false;
|
||||
HasCrypto = false;
|
||||
HasCRC = false;
|
||||
HasZeroCycleZeroing = false;
|
||||
AllowsUnalignedMem = false;
|
||||
Thumb2DSP = false;
|
||||
UseNaClTrap = false;
|
||||
|
@ -177,6 +177,10 @@ protected:
|
||||
/// HasCRC - if true, processor supports CRC instructions
|
||||
bool HasCRC;
|
||||
|
||||
/// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
|
||||
/// particularly effective at zeroing a VFP register.
|
||||
bool HasZeroCycleZeroing;
|
||||
|
||||
/// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
|
||||
/// accesses for some types. For details, see
|
||||
/// ARMTargetLowering::allowsUnalignedMemoryAccesses().
|
||||
@ -298,6 +302,7 @@ public:
|
||||
bool isFPOnlySP() const { return FPOnlySP; }
|
||||
bool hasPerfMon() const { return HasPerfMon; }
|
||||
bool hasTrustZone() const { return HasTrustZone; }
|
||||
bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
|
||||
bool prefers32BitThumb() const { return Pref32BitThumb; }
|
||||
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
|
||||
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
|
||||
|
70
test/CodeGen/ARM/zero-cycle-zero.ll
Normal file
70
test/CodeGen/ARM/zero-cycle-zero.ll
Normal file
@ -0,0 +1,70 @@
|
||||
; RUN: llc -mtriple=armv8 -mcpu=cyclone < %s | FileCheck %s --check-prefix=CHECK-CYCLONE
|
||||
; RUN: llc -mtriple=armv8 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK-SWIFT
|
||||
|
||||
declare arm_aapcs_vfpcc void @take_vec64(<2 x i32>)
|
||||
|
||||
define void @test_vec64() {
|
||||
; CHECK-CYCLONE-LABEL: test_vec64:
|
||||
; CHECK-SWIFT-LABEL: test_vec64:
|
||||
|
||||
call arm_aapcs_vfpcc void @take_vec64(<2 x i32> <i32 0, i32 0>)
|
||||
call arm_aapcs_vfpcc void @take_vec64(<2 x i32> <i32 0, i32 0>)
|
||||
; CHECK-CYCLONE-NOT: vmov.f64 d0,
|
||||
; CHECK-CYCLONE: vmov.i32 d0, #0
|
||||
; CHECK-CYCLONE: bl
|
||||
; CHECK-CYCLONE: vmov.i32 d0, #0
|
||||
; CHECK-CYCLONE: bl
|
||||
|
||||
; CHECK-SWIFT: vmov.f64 [[ZEROREG:d[0-9]+]],
|
||||
; CHECK-SWIFT: vmov.i32 [[ZEROREG]], #0
|
||||
; CHECK-SWIFT: vorr d0, [[ZEROREG]], [[ZEROREG]]
|
||||
; CHECK-SWIFT: bl
|
||||
; CHECK-SWIFT: vorr d0, [[ZEROREG]], [[ZEROREG]]
|
||||
; CHECK-SWIFT: bl
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
declare arm_aapcs_vfpcc void @take_vec128(<8 x i16>)
|
||||
|
||||
define void @test_vec128() {
|
||||
; CHECK-CYCLONE-LABEL: test_vec128:
|
||||
; CHECK-SWIFT-LABEL: test_vec128:
|
||||
|
||||
call arm_aapcs_vfpcc void @take_vec128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
|
||||
call arm_aapcs_vfpcc void @take_vec128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
|
||||
; CHECK-CYCLONE-NOT: vmov.f64 [[ZEROREG:d[0-9]+]],
|
||||
; CHECK-CYCLONE: vmov.i32 q0, #0
|
||||
; CHECK-CYCLONE: bl
|
||||
; CHECK-CYCLONE: vmov.i32 q0, #0
|
||||
; CHECK-CYCLONE: bl
|
||||
|
||||
; CHECK-SWIFT-NOT: vmov.f64 [[ZEROREG:d[0-9]+]],
|
||||
; CHECK-SWIFT: vmov.i32 [[ZEROREG:q[0-9]+]], #0
|
||||
; CHECK-SWIFT: vorr q0, [[ZEROREG]], [[ZEROREG]]
|
||||
; CHECK-SWIFT: bl
|
||||
; CHECK-SWIFT: vorr q0, [[ZEROREG]], [[ZEROREG]]
|
||||
; CHECK-SWIFT: bl
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @take_i32(i32)
|
||||
|
||||
define void @test_i32() {
|
||||
; CHECK-CYCLONE-LABEL: test_i32:
|
||||
; CHECK-SWIFT-LABEL: test_i32:
|
||||
|
||||
call arm_aapcs_vfpcc void @take_i32(i32 0)
|
||||
call arm_aapcs_vfpcc void @take_i32(i32 0)
|
||||
; CHECK-CYCLONE-NOT: vmov.f64 [[ZEROREG:d[0-9]+]],
|
||||
; CHECK-CYCLONE: mov r0, #0
|
||||
; CHECK-CYCLONE: bl
|
||||
; CHECK-CYCLONE: mov r0, #0
|
||||
; CHECK-CYCLONE: bl
|
||||
|
||||
; It doesn't particularly matter what Swift does here, there isn't carefully
|
||||
; crafted behaviour that we might break in Cyclone.
|
||||
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user