Implement NEON domain switching for scalar <-> S-register vmovs on ARM

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162094 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover 2012-08-17 11:32:52 +00:00
parent 734fd27647
commit 3c8ad92455
2 changed files with 129 additions and 47 deletions

View File

@ -3354,11 +3354,18 @@ enum ARMExeDomain {
//
std::pair<uint16_t, uint16_t>
ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
// VMOVD is a VFP instruction, but can be changed to NEON if it isn't
// predicated.
// VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
// if they are not predicated.
if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
// Cortex-A9 is particularly picky about mixing the two and wants these
// converted.
if (Subtarget.isCortexA9() && !isPredicated(MI) &&
(MI->getOpcode() == ARM::VMOVRS ||
MI->getOpcode() == ARM::VMOVSR))
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
// No other instructions can be swizzled, so just determine their domain.
unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
@ -3378,22 +3385,97 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
void
ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
// We only know how to change VMOVD into VORR.
assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD");
if (Domain != ExeNEON)
return;
unsigned DstReg, SrcReg, DReg;
unsigned Lane;
MachineInstrBuilder MIB(MI);
const TargetRegisterInfo *TRI = &getRegisterInfo();
bool isKill;
switch (MI->getOpcode()) {
default:
llvm_unreachable("cannot handle opcode!");
break;
case ARM::VMOVD:
if (Domain != ExeNEON)
break;
// Zap the predicate operands.
assert(!isPredicated(MI) && "Cannot predicate a VORRd");
MI->RemoveOperand(3);
MI->RemoveOperand(2);
// Zap the predicate operands.
assert(!isPredicated(MI) && "Cannot predicate a VORRd");
MI->RemoveOperand(3);
MI->RemoveOperand(2);
// Change to a VORRd which requires two identical use operands.
MI->setDesc(get(ARM::VORRd));
// Change to a VORRd which requires two identical use operands.
MI->setDesc(get(ARM::VORRd));
// Add the extra source operand and new predicates.
// This will go before any implicit ops.
AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
break;
case ARM::VMOVRS:
if (Domain != ExeNEON)
break;
assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
DstReg = MI->getOperand(0).getReg();
SrcReg = MI->getOperand(1).getReg();
DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass);
Lane = 0;
if (DReg == ARM::NoRegister) {
DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass);
Lane = 1;
assert(DReg && "S-register with no D super-register?");
}
MI->RemoveOperand(3);
MI->RemoveOperand(2);
MI->RemoveOperand(1);
MI->setDesc(get(ARM::VGETLNi32));
MIB.addReg(DReg);
MIB.addImm(Lane);
MIB->getOperand(1).setIsUndef();
MIB.addReg(SrcReg, RegState::Implicit);
AddDefaultPred(MIB);
break;
case ARM::VMOVSR:
if (Domain != ExeNEON)
break;
assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
DstReg = MI->getOperand(0).getReg();
SrcReg = MI->getOperand(1).getReg();
DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass);
Lane = 0;
if (DReg == ARM::NoRegister) {
DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass);
Lane = 1;
assert(DReg && "S-register with no D super-register?");
}
isKill = MI->getOperand(0).isKill();
MI->RemoveOperand(3);
MI->RemoveOperand(2);
MI->RemoveOperand(1);
MI->RemoveOperand(0);
MI->setDesc(get(ARM::VSETLNi32));
MIB.addReg(DReg);
MIB.addReg(DReg);
MIB.addReg(SrcReg);
MIB.addImm(Lane);
MIB->getOperand(1).setIsUndef();
if (isKill)
MIB->addRegisterKilled(DstReg, TRI, true);
MIB->addRegisterDefined(DstReg, TRI);
AddDefaultPred(MIB);
break;
}
// Add the extra source operand and new predicates.
// This will go before any implicit ops.
AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
}
bool ARMBaseInstrInfo::hasNOP() const {

View File

@ -33,16 +33,16 @@ define void @test_cos(<4 x float>* %X) nounwind {
; CHECK: movt [[reg0]], :upper16:{{.*}}
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}cosf
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}cosf
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}cosf
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}cosf
; CHECK: vstmia {{.*}}
@ -64,16 +64,16 @@ define void @test_exp(<4 x float>* %X) nounwind {
; CHECK: movt [[reg0]], :upper16:{{.*}}
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}expf
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}expf
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}expf
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}expf
; CHECK: vstmia {{.*}}
@ -95,16 +95,16 @@ define void @test_exp2(<4 x float>* %X) nounwind {
; CHECK: movt [[reg0]], :upper16:{{.*}}
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}exp2f
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}exp2f
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}exp2f
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}exp2f
; CHECK: vstmia {{.*}}
@ -126,16 +126,16 @@ define void @test_log10(<4 x float>* %X) nounwind {
; CHECK: movt [[reg0]], :upper16:{{.*}}
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}log10f
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}log10f
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}log10f
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}log10f
; CHECK: vstmia {{.*}}
@ -157,16 +157,16 @@ define void @test_log(<4 x float>* %X) nounwind {
; CHECK: movt [[reg0]], :upper16:{{.*}}
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}logf
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}logf
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}logf
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}logf
; CHECK: vstmia {{.*}}
@ -188,16 +188,16 @@ define void @test_log2(<4 x float>* %X) nounwind {
; CHECK: movt [[reg0]], :upper16:{{.*}}
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}log2f
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}log2f
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}log2f
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}log2f
; CHECK: vstmia {{.*}}
@ -220,16 +220,16 @@ define void @test_pow(<4 x float>* %X) nounwind {
; CHECK: movt [[reg0]], :upper16:{{.*}}
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}powf
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}powf
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}powf
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}powf
; CHECK: vstmia {{.*}}
@ -277,16 +277,16 @@ define void @test_sin(<4 x float>* %X) nounwind {
; CHECK: movt [[reg0]], :upper16:{{.*}}
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}sinf
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}sinf
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}sinf
; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
; CHECK: {{[mov|vmov.32]}} r0,
; CHECK: bl {{.*}}sinf
; CHECK: vstmia {{.*}}