mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-21 03:32:21 +00:00
Implement NEON domain switching for scalar <-> S-register vmovs on ARM
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162094 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
734fd27647
commit
3c8ad92455
@ -3354,11 +3354,18 @@ enum ARMExeDomain {
|
||||
//
|
||||
std::pair<uint16_t, uint16_t>
|
||||
ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
|
||||
// VMOVD is a VFP instruction, but can be changed to NEON if it isn't
|
||||
// predicated.
|
||||
// VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
|
||||
// if they are not predicated.
|
||||
if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
|
||||
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
|
||||
|
||||
// Cortex-A9 is particularly picky about mixing the two and wants these
|
||||
// converted.
|
||||
if (Subtarget.isCortexA9() && !isPredicated(MI) &&
|
||||
(MI->getOpcode() == ARM::VMOVRS ||
|
||||
MI->getOpcode() == ARM::VMOVSR))
|
||||
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
|
||||
|
||||
// No other instructions can be swizzled, so just determine their domain.
|
||||
unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
|
||||
|
||||
@ -3378,22 +3385,97 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
|
||||
|
||||
void
|
||||
ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
|
||||
// We only know how to change VMOVD into VORR.
|
||||
assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD");
|
||||
if (Domain != ExeNEON)
|
||||
return;
|
||||
unsigned DstReg, SrcReg, DReg;
|
||||
unsigned Lane;
|
||||
MachineInstrBuilder MIB(MI);
|
||||
const TargetRegisterInfo *TRI = &getRegisterInfo();
|
||||
bool isKill;
|
||||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
llvm_unreachable("cannot handle opcode!");
|
||||
break;
|
||||
case ARM::VMOVD:
|
||||
if (Domain != ExeNEON)
|
||||
break;
|
||||
|
||||
// Zap the predicate operands.
|
||||
assert(!isPredicated(MI) && "Cannot predicate a VORRd");
|
||||
MI->RemoveOperand(3);
|
||||
MI->RemoveOperand(2);
|
||||
// Zap the predicate operands.
|
||||
assert(!isPredicated(MI) && "Cannot predicate a VORRd");
|
||||
MI->RemoveOperand(3);
|
||||
MI->RemoveOperand(2);
|
||||
|
||||
// Change to a VORRd which requires two identical use operands.
|
||||
MI->setDesc(get(ARM::VORRd));
|
||||
// Change to a VORRd which requires two identical use operands.
|
||||
MI->setDesc(get(ARM::VORRd));
|
||||
|
||||
// Add the extra source operand and new predicates.
|
||||
// This will go before any implicit ops.
|
||||
AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
|
||||
break;
|
||||
case ARM::VMOVRS:
|
||||
if (Domain != ExeNEON)
|
||||
break;
|
||||
assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
|
||||
|
||||
DstReg = MI->getOperand(0).getReg();
|
||||
SrcReg = MI->getOperand(1).getReg();
|
||||
|
||||
DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass);
|
||||
Lane = 0;
|
||||
if (DReg == ARM::NoRegister) {
|
||||
DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass);
|
||||
Lane = 1;
|
||||
assert(DReg && "S-register with no D super-register?");
|
||||
}
|
||||
|
||||
MI->RemoveOperand(3);
|
||||
MI->RemoveOperand(2);
|
||||
MI->RemoveOperand(1);
|
||||
|
||||
MI->setDesc(get(ARM::VGETLNi32));
|
||||
MIB.addReg(DReg);
|
||||
MIB.addImm(Lane);
|
||||
|
||||
MIB->getOperand(1).setIsUndef();
|
||||
MIB.addReg(SrcReg, RegState::Implicit);
|
||||
|
||||
AddDefaultPred(MIB);
|
||||
break;
|
||||
case ARM::VMOVSR:
|
||||
if (Domain != ExeNEON)
|
||||
break;
|
||||
assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
|
||||
|
||||
DstReg = MI->getOperand(0).getReg();
|
||||
SrcReg = MI->getOperand(1).getReg();
|
||||
DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass);
|
||||
Lane = 0;
|
||||
if (DReg == ARM::NoRegister) {
|
||||
DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass);
|
||||
Lane = 1;
|
||||
assert(DReg && "S-register with no D super-register?");
|
||||
}
|
||||
isKill = MI->getOperand(0).isKill();
|
||||
|
||||
MI->RemoveOperand(3);
|
||||
MI->RemoveOperand(2);
|
||||
MI->RemoveOperand(1);
|
||||
MI->RemoveOperand(0);
|
||||
|
||||
MI->setDesc(get(ARM::VSETLNi32));
|
||||
MIB.addReg(DReg);
|
||||
MIB.addReg(DReg);
|
||||
MIB.addReg(SrcReg);
|
||||
MIB.addImm(Lane);
|
||||
|
||||
MIB->getOperand(1).setIsUndef();
|
||||
|
||||
if (isKill)
|
||||
MIB->addRegisterKilled(DstReg, TRI, true);
|
||||
MIB->addRegisterDefined(DstReg, TRI);
|
||||
|
||||
AddDefaultPred(MIB);
|
||||
break;
|
||||
}
|
||||
|
||||
// Add the extra source operand and new predicates.
|
||||
// This will go before any implicit ops.
|
||||
AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
|
||||
}
|
||||
|
||||
bool ARMBaseInstrInfo::hasNOP() const {
|
||||
|
@ -33,16 +33,16 @@ define void @test_cos(<4 x float>* %X) nounwind {
|
||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}cosf
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}cosf
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}cosf
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}cosf
|
||||
|
||||
; CHECK: vstmia {{.*}}
|
||||
@ -64,16 +64,16 @@ define void @test_exp(<4 x float>* %X) nounwind {
|
||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}expf
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}expf
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}expf
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}expf
|
||||
|
||||
; CHECK: vstmia {{.*}}
|
||||
@ -95,16 +95,16 @@ define void @test_exp2(<4 x float>* %X) nounwind {
|
||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}exp2f
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}exp2f
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}exp2f
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}exp2f
|
||||
|
||||
; CHECK: vstmia {{.*}}
|
||||
@ -126,16 +126,16 @@ define void @test_log10(<4 x float>* %X) nounwind {
|
||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}log10f
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}log10f
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}log10f
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}log10f
|
||||
|
||||
; CHECK: vstmia {{.*}}
|
||||
@ -157,16 +157,16 @@ define void @test_log(<4 x float>* %X) nounwind {
|
||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}logf
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}logf
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}logf
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}logf
|
||||
|
||||
; CHECK: vstmia {{.*}}
|
||||
@ -188,16 +188,16 @@ define void @test_log2(<4 x float>* %X) nounwind {
|
||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}log2f
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}log2f
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}log2f
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}log2f
|
||||
|
||||
; CHECK: vstmia {{.*}}
|
||||
@ -220,16 +220,16 @@ define void @test_pow(<4 x float>* %X) nounwind {
|
||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}powf
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}powf
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}powf
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}powf
|
||||
|
||||
; CHECK: vstmia {{.*}}
|
||||
@ -277,16 +277,16 @@ define void @test_sin(<4 x float>* %X) nounwind {
|
||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}sinf
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}sinf
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}sinf
|
||||
|
||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
||||
; CHECK: {{[mov|vmov.32]}} r0,
|
||||
; CHECK: bl {{.*}}sinf
|
||||
|
||||
; CHECK: vstmia {{.*}}
|
||||
|
Loading…
x
Reference in New Issue
Block a user