mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-08-12 17:25:49 +00:00
Implement NEON domain switching for scalar <-> S-register vmovs on ARM
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162094 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -3354,11 +3354,18 @@ enum ARMExeDomain {
|
|||||||
//
|
//
|
||||||
std::pair<uint16_t, uint16_t>
|
std::pair<uint16_t, uint16_t>
|
||||||
ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
|
ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
|
||||||
// VMOVD is a VFP instruction, but can be changed to NEON if it isn't
|
// VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
|
||||||
// predicated.
|
// if they are not predicated.
|
||||||
if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
|
if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
|
||||||
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
|
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
|
||||||
|
|
||||||
|
// Cortex-A9 is particularly picky about mixing the two and wants these
|
||||||
|
// converted.
|
||||||
|
if (Subtarget.isCortexA9() && !isPredicated(MI) &&
|
||||||
|
(MI->getOpcode() == ARM::VMOVRS ||
|
||||||
|
MI->getOpcode() == ARM::VMOVSR))
|
||||||
|
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
|
||||||
|
|
||||||
// No other instructions can be swizzled, so just determine their domain.
|
// No other instructions can be swizzled, so just determine their domain.
|
||||||
unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
|
unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
|
||||||
|
|
||||||
@@ -3378,22 +3385,97 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
|
|||||||
|
|
||||||
void
|
void
|
||||||
ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
|
ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
|
||||||
// We only know how to change VMOVD into VORR.
|
unsigned DstReg, SrcReg, DReg;
|
||||||
assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD");
|
unsigned Lane;
|
||||||
if (Domain != ExeNEON)
|
MachineInstrBuilder MIB(MI);
|
||||||
return;
|
const TargetRegisterInfo *TRI = &getRegisterInfo();
|
||||||
|
bool isKill;
|
||||||
|
switch (MI->getOpcode()) {
|
||||||
|
default:
|
||||||
|
llvm_unreachable("cannot handle opcode!");
|
||||||
|
break;
|
||||||
|
case ARM::VMOVD:
|
||||||
|
if (Domain != ExeNEON)
|
||||||
|
break;
|
||||||
|
|
||||||
// Zap the predicate operands.
|
// Zap the predicate operands.
|
||||||
assert(!isPredicated(MI) && "Cannot predicate a VORRd");
|
assert(!isPredicated(MI) && "Cannot predicate a VORRd");
|
||||||
MI->RemoveOperand(3);
|
MI->RemoveOperand(3);
|
||||||
MI->RemoveOperand(2);
|
MI->RemoveOperand(2);
|
||||||
|
|
||||||
// Change to a VORRd which requires two identical use operands.
|
// Change to a VORRd which requires two identical use operands.
|
||||||
MI->setDesc(get(ARM::VORRd));
|
MI->setDesc(get(ARM::VORRd));
|
||||||
|
|
||||||
|
// Add the extra source operand and new predicates.
|
||||||
|
// This will go before any implicit ops.
|
||||||
|
AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
|
||||||
|
break;
|
||||||
|
case ARM::VMOVRS:
|
||||||
|
if (Domain != ExeNEON)
|
||||||
|
break;
|
||||||
|
assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
|
||||||
|
|
||||||
|
DstReg = MI->getOperand(0).getReg();
|
||||||
|
SrcReg = MI->getOperand(1).getReg();
|
||||||
|
|
||||||
|
DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass);
|
||||||
|
Lane = 0;
|
||||||
|
if (DReg == ARM::NoRegister) {
|
||||||
|
DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass);
|
||||||
|
Lane = 1;
|
||||||
|
assert(DReg && "S-register with no D super-register?");
|
||||||
|
}
|
||||||
|
|
||||||
|
MI->RemoveOperand(3);
|
||||||
|
MI->RemoveOperand(2);
|
||||||
|
MI->RemoveOperand(1);
|
||||||
|
|
||||||
|
MI->setDesc(get(ARM::VGETLNi32));
|
||||||
|
MIB.addReg(DReg);
|
||||||
|
MIB.addImm(Lane);
|
||||||
|
|
||||||
|
MIB->getOperand(1).setIsUndef();
|
||||||
|
MIB.addReg(SrcReg, RegState::Implicit);
|
||||||
|
|
||||||
|
AddDefaultPred(MIB);
|
||||||
|
break;
|
||||||
|
case ARM::VMOVSR:
|
||||||
|
if (Domain != ExeNEON)
|
||||||
|
break;
|
||||||
|
assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
|
||||||
|
|
||||||
|
DstReg = MI->getOperand(0).getReg();
|
||||||
|
SrcReg = MI->getOperand(1).getReg();
|
||||||
|
DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass);
|
||||||
|
Lane = 0;
|
||||||
|
if (DReg == ARM::NoRegister) {
|
||||||
|
DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass);
|
||||||
|
Lane = 1;
|
||||||
|
assert(DReg && "S-register with no D super-register?");
|
||||||
|
}
|
||||||
|
isKill = MI->getOperand(0).isKill();
|
||||||
|
|
||||||
|
MI->RemoveOperand(3);
|
||||||
|
MI->RemoveOperand(2);
|
||||||
|
MI->RemoveOperand(1);
|
||||||
|
MI->RemoveOperand(0);
|
||||||
|
|
||||||
|
MI->setDesc(get(ARM::VSETLNi32));
|
||||||
|
MIB.addReg(DReg);
|
||||||
|
MIB.addReg(DReg);
|
||||||
|
MIB.addReg(SrcReg);
|
||||||
|
MIB.addImm(Lane);
|
||||||
|
|
||||||
|
MIB->getOperand(1).setIsUndef();
|
||||||
|
|
||||||
|
if (isKill)
|
||||||
|
MIB->addRegisterKilled(DstReg, TRI, true);
|
||||||
|
MIB->addRegisterDefined(DstReg, TRI);
|
||||||
|
|
||||||
|
AddDefaultPred(MIB);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// Add the extra source operand and new predicates.
|
|
||||||
// This will go before any implicit ops.
|
|
||||||
AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ARMBaseInstrInfo::hasNOP() const {
|
bool ARMBaseInstrInfo::hasNOP() const {
|
||||||
|
@@ -33,16 +33,16 @@ define void @test_cos(<4 x float>* %X) nounwind {
|
|||||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}cosf
|
; CHECK: bl {{.*}}cosf
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}cosf
|
; CHECK: bl {{.*}}cosf
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}cosf
|
; CHECK: bl {{.*}}cosf
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}cosf
|
; CHECK: bl {{.*}}cosf
|
||||||
|
|
||||||
; CHECK: vstmia {{.*}}
|
; CHECK: vstmia {{.*}}
|
||||||
@@ -64,16 +64,16 @@ define void @test_exp(<4 x float>* %X) nounwind {
|
|||||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}expf
|
; CHECK: bl {{.*}}expf
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}expf
|
; CHECK: bl {{.*}}expf
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}expf
|
; CHECK: bl {{.*}}expf
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}expf
|
; CHECK: bl {{.*}}expf
|
||||||
|
|
||||||
; CHECK: vstmia {{.*}}
|
; CHECK: vstmia {{.*}}
|
||||||
@@ -95,16 +95,16 @@ define void @test_exp2(<4 x float>* %X) nounwind {
|
|||||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}exp2f
|
; CHECK: bl {{.*}}exp2f
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}exp2f
|
; CHECK: bl {{.*}}exp2f
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}exp2f
|
; CHECK: bl {{.*}}exp2f
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}exp2f
|
; CHECK: bl {{.*}}exp2f
|
||||||
|
|
||||||
; CHECK: vstmia {{.*}}
|
; CHECK: vstmia {{.*}}
|
||||||
@@ -126,16 +126,16 @@ define void @test_log10(<4 x float>* %X) nounwind {
|
|||||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}log10f
|
; CHECK: bl {{.*}}log10f
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}log10f
|
; CHECK: bl {{.*}}log10f
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}log10f
|
; CHECK: bl {{.*}}log10f
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}log10f
|
; CHECK: bl {{.*}}log10f
|
||||||
|
|
||||||
; CHECK: vstmia {{.*}}
|
; CHECK: vstmia {{.*}}
|
||||||
@@ -157,16 +157,16 @@ define void @test_log(<4 x float>* %X) nounwind {
|
|||||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}logf
|
; CHECK: bl {{.*}}logf
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}logf
|
; CHECK: bl {{.*}}logf
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}logf
|
; CHECK: bl {{.*}}logf
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}logf
|
; CHECK: bl {{.*}}logf
|
||||||
|
|
||||||
; CHECK: vstmia {{.*}}
|
; CHECK: vstmia {{.*}}
|
||||||
@@ -188,16 +188,16 @@ define void @test_log2(<4 x float>* %X) nounwind {
|
|||||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}log2f
|
; CHECK: bl {{.*}}log2f
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}log2f
|
; CHECK: bl {{.*}}log2f
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}log2f
|
; CHECK: bl {{.*}}log2f
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}log2f
|
; CHECK: bl {{.*}}log2f
|
||||||
|
|
||||||
; CHECK: vstmia {{.*}}
|
; CHECK: vstmia {{.*}}
|
||||||
@@ -220,16 +220,16 @@ define void @test_pow(<4 x float>* %X) nounwind {
|
|||||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}powf
|
; CHECK: bl {{.*}}powf
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}powf
|
; CHECK: bl {{.*}}powf
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}powf
|
; CHECK: bl {{.*}}powf
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}powf
|
; CHECK: bl {{.*}}powf
|
||||||
|
|
||||||
; CHECK: vstmia {{.*}}
|
; CHECK: vstmia {{.*}}
|
||||||
@@ -277,16 +277,16 @@ define void @test_sin(<4 x float>* %X) nounwind {
|
|||||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}sinf
|
; CHECK: bl {{.*}}sinf
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}sinf
|
; CHECK: bl {{.*}}sinf
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}sinf
|
; CHECK: bl {{.*}}sinf
|
||||||
|
|
||||||
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
|
; CHECK: {{[mov|vmov.32]}} r0,
|
||||||
; CHECK: bl {{.*}}sinf
|
; CHECK: bl {{.*}}sinf
|
||||||
|
|
||||||
; CHECK: vstmia {{.*}}
|
; CHECK: vstmia {{.*}}
|
||||||
|
Reference in New Issue
Block a user