mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-01 00:33:09 +00:00
Add support for moving pure S-register to NEON pipeline if desired
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162898 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f52f6b9eca
commit
c4a32e6596
@ -3377,7 +3377,8 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
|
||||
// converted.
|
||||
if (Subtarget.isCortexA9() && !isPredicated(MI) &&
|
||||
(MI->getOpcode() == ARM::VMOVRS ||
|
||||
MI->getOpcode() == ARM::VMOVSR))
|
||||
MI->getOpcode() == ARM::VMOVSR ||
|
||||
MI->getOpcode() == ARM::VMOVS))
|
||||
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
|
||||
|
||||
// No other instructions can be swizzled, so just determine their domain.
|
||||
@ -3490,10 +3491,78 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
|
||||
.addReg(DReg, RegState::Undef)
|
||||
.addReg(SrcReg)
|
||||
.addImm(Lane));
|
||||
|
||||
|
||||
// The destination must be marked as set.
|
||||
MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
|
||||
break;
|
||||
case ARM::VMOVS: {
|
||||
if (Domain != ExeNEON)
|
||||
break;
|
||||
|
||||
// Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
|
||||
DstReg = MI->getOperand(0).getReg();
|
||||
SrcReg = MI->getOperand(1).getReg();
|
||||
|
||||
for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
|
||||
MI->RemoveOperand(i-1);
|
||||
|
||||
unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
|
||||
DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
|
||||
DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
|
||||
|
||||
if (DSrc == DDst) {
|
||||
// Destination can be:
|
||||
// %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
|
||||
MI->setDesc(get(ARM::VDUPLN32d));
|
||||
AddDefaultPred(MIB.addReg(DDst, RegState::Define)
|
||||
.addReg(DDst, RegState::Undef)
|
||||
.addImm(SrcLane));
|
||||
|
||||
// Neither the source or the destination are naturally represented any
|
||||
// more, so add them in manually.
|
||||
MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
|
||||
MIB.addReg(SrcReg, RegState::Implicit);
|
||||
break;
|
||||
}
|
||||
|
||||
// In general there's no single instruction that can perform an S <-> S
|
||||
// move in NEON space, but a pair of VEXT instructions *can* do the
|
||||
// job. It turns out that the VEXTs needed will only use DSrc once, with
|
||||
// the position based purely on the combination of lane-0 and lane-1
|
||||
// involved. For example
|
||||
// vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
|
||||
// vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
|
||||
// vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
|
||||
// vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
|
||||
//
|
||||
// Pattern of the MachineInstrs is:
|
||||
// %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
|
||||
MachineInstrBuilder NewMIB;
|
||||
NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
|
||||
get(ARM::VEXTd32), DDst);
|
||||
NewMIB.addReg(SrcLane == 1 && DstLane == 1 ? DSrc : DDst, RegState::Undef);
|
||||
NewMIB.addReg(SrcLane == 0 && DstLane == 0 ? DSrc : DDst, RegState::Undef);
|
||||
NewMIB.addImm(1);
|
||||
AddDefaultPred(NewMIB);
|
||||
|
||||
if (SrcLane == DstLane)
|
||||
NewMIB.addReg(SrcReg, RegState::Implicit);
|
||||
|
||||
MI->setDesc(get(ARM::VEXTd32));
|
||||
MIB.addReg(DDst, RegState::Define);
|
||||
MIB.addReg(SrcLane == 1 && DstLane == 0 ? DSrc : DDst, RegState::Undef);
|
||||
MIB.addReg(SrcLane == 0 && DstLane == 1 ? DSrc : DDst, RegState::Undef);
|
||||
MIB.addImm(1);
|
||||
AddDefaultPred(MIB);
|
||||
|
||||
if (SrcLane != DstLane)
|
||||
MIB.addReg(SrcReg, RegState::Implicit);
|
||||
|
||||
// As before, the original destination is no longer represented, add it
|
||||
// implicitly.
|
||||
MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
64
test/CodeGen/ARM/domain-conv-vmovs.ll
Normal file
64
test/CodeGen/ARM/domain-conv-vmovs.ll
Normal file
@ -0,0 +1,64 @@
|
||||
; RUN: llc -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp -float-abi=hard < %s | FileCheck %s
|
||||
|
||||
define <2 x float> @test_vmovs_via_vext_lane0to0(float %arg, <2 x float> %in) {
|
||||
; CHECK: test_vmovs_via_vext_lane0to0:
|
||||
%vec = insertelement <2 x float> %in, float %arg, i32 0
|
||||
%res = fadd <2 x float> %vec, %vec
|
||||
|
||||
; CHECK: vext.32 d1, d1, d0, #1
|
||||
; CHECK: vext.32 d1, d1, d1, #1
|
||||
; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
|
||||
|
||||
ret <2 x float> %res
|
||||
}
|
||||
|
||||
define <2 x float> @test_vmovs_via_vext_lane0to1(float %arg, <2 x float> %in) {
|
||||
; CHECK: test_vmovs_via_vext_lane0to1:
|
||||
%vec = insertelement <2 x float> %in, float %arg, i32 1
|
||||
%res = fadd <2 x float> %vec, %vec
|
||||
|
||||
; CHECK: vext.32 d1, d1, d1, #1
|
||||
; CHECK: vext.32 d1, d1, d0, #1
|
||||
; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
|
||||
|
||||
ret <2 x float> %res
|
||||
}
|
||||
|
||||
define <2 x float> @test_vmovs_via_vext_lane1to0(float, float %arg, <2 x float> %in) {
|
||||
; CHECK: test_vmovs_via_vext_lane1to0:
|
||||
%vec = insertelement <2 x float> %in, float %arg, i32 0
|
||||
%res = fadd <2 x float> %vec, %vec
|
||||
|
||||
; CHECK: vext.32 d1, d1, d1, #1
|
||||
; CHECK: vext.32 d1, d0, d1, #1
|
||||
; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
|
||||
|
||||
ret <2 x float> %res
|
||||
}
|
||||
|
||||
define <2 x float> @test_vmovs_via_vext_lane1to1(float, float %arg, <2 x float> %in) {
|
||||
; CHECK: test_vmovs_via_vext_lane1to1:
|
||||
%vec = insertelement <2 x float> %in, float %arg, i32 1
|
||||
%res = fadd <2 x float> %vec, %vec
|
||||
|
||||
; CHECK: vext.32 d1, d0, d1, #1
|
||||
; CHECK: vext.32 d1, d1, d1, #1
|
||||
; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
|
||||
|
||||
ret <2 x float> %res
|
||||
}
|
||||
|
||||
|
||||
define float @test_vmovs_via_vdup(float, float %ret, float %lhs, float %rhs) {
|
||||
; CHECK: test_vmovs_via_vdup:
|
||||
|
||||
; Do an operation (which will end up NEON because of +neonfp) to convince the
|
||||
; execution-domain pass that NEON is a good thing to use.
|
||||
%res = fadd float %ret, %ret
|
||||
; It makes sense for LLVM to do the addition in d0 here, because it's going
|
||||
; to be returned. This means it will want a "vmov s0, s1":
|
||||
; CHECK: vdup.32 d0, d0[1]
|
||||
|
||||
ret float %res
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user