From 3c8ad92455ff06c8e69085702ef1f13944eab4dd Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 17 Aug 2012 11:32:52 +0000 Subject: [PATCH] Implement NEON domain switching for scalar <-> S-register vmovs on ARM git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162094 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 112 +++++++++++++++--- .../ARM/2011-11-29-128bitArithmetics.ll | 64 +++++----- 2 files changed, 129 insertions(+), 47 deletions(-) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 0261bb3ed7e..1cc5a17cb02 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -3354,11 +3354,18 @@ enum ARMExeDomain { // std::pair ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { - // VMOVD is a VFP instruction, but can be changed to NEON if it isn't - // predicated. + // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON + // if they are not predicated. if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) return std::make_pair(ExeVFP, (1<getOpcode() == ARM::VMOVRS || + MI->getOpcode() == ARM::VMOVSR)) + return std::make_pair(ExeVFP, (1<getDesc().TSFlags & ARMII::DomainMask; @@ -3378,22 +3385,97 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { void ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { - // We only know how to change VMOVD into VORR. - assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD"); - if (Domain != ExeNEON) - return; + unsigned DstReg, SrcReg, DReg; + unsigned Lane; + MachineInstrBuilder MIB(MI); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + bool isKill; + switch (MI->getOpcode()) { + default: + llvm_unreachable("cannot handle opcode!"); + break; + case ARM::VMOVD: + if (Domain != ExeNEON) + break; - // Zap the predicate operands. - assert(!isPredicated(MI) && "Cannot predicate a VORRd"); - MI->RemoveOperand(3); - MI->RemoveOperand(2); + // Zap the predicate operands. + assert(!isPredicated(MI) && "Cannot predicate a VORRd"); + MI->RemoveOperand(3); + MI->RemoveOperand(2); - // Change to a VORRd which requires two identical use operands. - MI->setDesc(get(ARM::VORRd)); + // Change to a VORRd which requires two identical use operands. + MI->setDesc(get(ARM::VORRd)); + + // Add the extra source operand and new predicates. + // This will go before any implicit ops. + AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); + break; + case ARM::VMOVRS: + if (Domain != ExeNEON) + break; + assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); + + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + + DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass); + Lane = 0; + if (DReg == ARM::NoRegister) { + DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass); + Lane = 1; + assert(DReg && "S-register with no D super-register?"); + } + + MI->RemoveOperand(3); + MI->RemoveOperand(2); + MI->RemoveOperand(1); + + MI->setDesc(get(ARM::VGETLNi32)); + MIB.addReg(DReg); + MIB.addImm(Lane); + + MIB->getOperand(1).setIsUndef(); + MIB.addReg(SrcReg, RegState::Implicit); + + AddDefaultPred(MIB); + break; + case ARM::VMOVSR: + if (Domain != ExeNEON) + break; + assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); + + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass); + Lane = 0; + if (DReg == ARM::NoRegister) { + DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass); + Lane = 1; + assert(DReg && "S-register with no D super-register?"); + } + isKill = MI->getOperand(0).isKill(); + + MI->RemoveOperand(3); + MI->RemoveOperand(2); + MI->RemoveOperand(1); + MI->RemoveOperand(0); + + MI->setDesc(get(ARM::VSETLNi32)); + MIB.addReg(DReg); + MIB.addReg(DReg); + MIB.addReg(SrcReg); + MIB.addImm(Lane); + + MIB->getOperand(1).setIsUndef(); + + if (isKill) + MIB->addRegisterKilled(DstReg, TRI, true); + MIB->addRegisterDefined(DstReg, TRI); + + AddDefaultPred(MIB); + break; + } - // Add the extra source operand and new predicates. - // This will go before any implicit ops. - AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); } bool ARMBaseInstrInfo::hasNOP() const { diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll index 6fbae199aae..89c01d58c39 100644 --- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll +++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll @@ -33,16 +33,16 @@ define void @test_cos(<4 x float>* %X) nounwind { ; CHECK: movt [[reg0]], :upper16:{{.*}} ; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}} -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}cosf -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}cosf -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}cosf -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}cosf ; CHECK: vstmia {{.*}} @@ -64,16 +64,16 @@ define void @test_exp(<4 x float>* %X) nounwind { ; CHECK: movt [[reg0]], :upper16:{{.*}} ; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}} -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}expf -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}expf -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}expf -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}expf ; CHECK: vstmia {{.*}} @@ -95,16 +95,16 @@ define void @test_exp2(<4 x float>* %X) nounwind { ; CHECK: movt [[reg0]], :upper16:{{.*}} ; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}} -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}exp2f -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}exp2f -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}exp2f -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}exp2f ; CHECK: vstmia {{.*}} @@ -126,16 +126,16 @@ define void @test_log10(<4 x float>* %X) nounwind { ; CHECK: movt [[reg0]], :upper16:{{.*}} ; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}} -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}log10f -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}log10f -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}log10f -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}log10f ; CHECK: vstmia {{.*}} @@ -157,16 +157,16 @@ define void @test_log(<4 x float>* %X) nounwind { ; CHECK: movt [[reg0]], :upper16:{{.*}} ; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}} -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}logf -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}logf -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}logf -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}logf ; CHECK: vstmia {{.*}} @@ -188,16 +188,16 @@ define void @test_log2(<4 x float>* %X) nounwind { ; CHECK: movt [[reg0]], :upper16:{{.*}} ; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}} -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}log2f -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}log2f -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}log2f -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}log2f ; CHECK: vstmia {{.*}} @@ -220,16 +220,16 @@ define void @test_pow(<4 x float>* %X) nounwind { ; CHECK: movt [[reg0]], :upper16:{{.*}} ; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}} -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}powf -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}powf -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}powf -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}powf ; CHECK: vstmia {{.*}} @@ -277,16 +277,16 @@ define void @test_sin(<4 x float>* %X) nounwind { ; CHECK: movt [[reg0]], :upper16:{{.*}} ; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}} -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}sinf -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}sinf -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}sinf -; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}} +; CHECK: {{[mov|vmov.32]}} r0, ; CHECK: bl {{.*}}sinf ; CHECK: vstmia {{.*}}