From b1fadec968a2c9e323a9fff054299c4a6870eb83 Mon Sep 17 00:00:00 2001 From: Kevin Qin Date: Fri, 24 Jan 2014 07:53:04 +0000 Subject: [PATCH] [AArch64 NEON] Fix a bug in implementing register copy bwtween FPR16. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199978 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrInfo.cpp | 4 ++-- test/CodeGen/AArch64/neon-copy.ll | 13 ++++++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 34312e22844..9bf9d1918c0 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -135,9 +135,9 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } else if (AArch64::FPR16RegClass.contains(DestReg, SrcReg)) { // The copy of two FPR16 registers is implemented by the copy of two FPR32 const TargetRegisterInfo *TRI = &getRegisterInfo(); - unsigned Dst = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_16, + unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_16, &AArch64::FPR32RegClass); - unsigned Src = TRI->getMatchingSuperReg(DestReg, AArch64::sub_16, + unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_16, &AArch64::FPR32RegClass); BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst) .addReg(Src); diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll index 9960e15513e..2a8d97414b8 100644 --- a/test/CodeGen/AArch64/neon-copy.ll +++ b/test/CodeGen/AArch64/neon-copy.ll @@ -1262,4 +1262,15 @@ entry: %vecext1 = extractelement <1 x i64> %y, i32 0 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 ret <2 x i64> %vecinit2 -} \ No newline at end of file +} + +declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>) + +define <1 x i16> @test_copy_FPR16_FPR16(<1 x i16> %a, <1 x i16> %b) { +; CHECK-LABEL: test_copy_FPR16_FPR16: +; CHECK: usqadd h1, h0 +; CHECK-NEXT: fmov s0, s1 +entry: + %vsqadd2.i = call <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16> %b, <1 x i16> %a) + ret <1 x i16> %vsqadd2.i +}