From 57f6b2778b59e3ff2d9f9664eacb432a0ddd0887 Mon Sep 17 00:00:00 2001 From: Kevin Qin Date: Tue, 26 Nov 2013 02:33:42 +0000 Subject: [PATCH] [AArch64]Implement 128 bit register copy with NEON. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195713 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrInfo.cpp | 34 +++++++++++++------------ test/CodeGen/AArch64/neon-perm.ll | 3 +++ 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 706d0b05e06..180110a84dd 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -114,23 +114,25 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } else if (AArch64::FPR128RegClass.contains(DestReg)) { assert(AArch64::FPR128RegClass.contains(SrcReg)); - // FIXME: there's no good way to do this, at least without NEON: - // + There's no single move instruction for q-registers - // + We can't create a spill slot and use normal STR/LDR because stack - // allocation has already happened - // + We can't go via X-registers with FMOV because register allocation has - // already happened. - // This may not be efficient, but at least it works. - BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP) - .addReg(SrcReg) - .addReg(AArch64::XSP) - .addImm(0x1ff & -16); + // If NEON is enable, we use ORR to implement this copy. + // If NEON isn't available, emit STR and LDR to handle this. + if(getSubTarget().hasNEON()) { + BuildMI(MBB, I, DL, get(AArch64::ORRvvv_16B), DestReg) + .addReg(SrcReg) + .addReg(SrcReg); + return; + } else { + BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP) + .addReg(SrcReg) + .addReg(AArch64::XSP) + .addImm(0x1ff & -16); - BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg) - .addReg(AArch64::XSP, RegState::Define) - .addReg(AArch64::XSP) - .addImm(16); - return; + BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg) + .addReg(AArch64::XSP, RegState::Define) + .addReg(AArch64::XSP) + .addImm(16); + return; + } } else { llvm_unreachable("Unknown register class in copyPhysReg"); } diff --git a/test/CodeGen/AArch64/neon-perm.ll b/test/CodeGen/AArch64/neon-perm.ll index 4db4771cf13..6ad93d01f9f 100644 --- a/test/CodeGen/AArch64/neon-perm.ll +++ b/test/CodeGen/AArch64/neon-perm.ll @@ -238,6 +238,7 @@ entry: define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK: test_vuzp2q_s64: ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i @@ -294,6 +295,7 @@ entry: define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK: test_vuzp2q_u64: ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i @@ -318,6 +320,7 @@ entry: define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK: test_vuzp2q_f64: ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle.i