From 2f3691eb61b7d77e21895fcacdb0fa6553ec99ec Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 28 Apr 2014 16:21:50 +0000 Subject: [PATCH] [ARM64] Fix an issue where we were always assuming a copy was coming from a D subregister. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207423 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp | 6 ++-- test/CodeGen/ARM64/AdvSIMD-Scalar.ll | 31 ++++++++++++++++++++- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp b/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp index 851572485dd..87eec8f6160 100644 --- a/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp +++ b/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp @@ -90,7 +90,7 @@ public: virtual bool runOnMachineFunction(MachineFunction &F); const char *getPassName() const { - return "AdvSIMD scalar operation optimization"; + return "AdvSIMD Scalar Operation Optimization"; } virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -117,7 +117,7 @@ static bool isFPR64(unsigned Reg, unsigned SubReg, SubReg == 0) || (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR128RegClass) && SubReg == ARM64::dsub); - // Physical register references just check the regist class directly. + // Physical register references just check the register class directly. return (ARM64::FPR64RegClass.contains(Reg) && SubReg == 0) || (ARM64::FPR128RegClass.contains(Reg) && SubReg == ARM64::dsub); } @@ -148,7 +148,7 @@ static unsigned getSrcFromCopy(const MachineInstr *MI, MRI) && isFPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(), MRI)) { - SubReg = ARM64::dsub; + SubReg = MI->getOperand(1).getSubReg(); return MI->getOperand(1).getReg(); } } diff --git a/test/CodeGen/ARM64/AdvSIMD-Scalar.ll b/test/CodeGen/ARM64/AdvSIMD-Scalar.ll index 6397ac54d3e..3e75eed4cd5 100644 --- a/test/CodeGen/ARM64/AdvSIMD-Scalar.ll +++ b/test/CodeGen/ARM64/AdvSIMD-Scalar.ll @@ -1,10 +1,15 @@ ; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -arm64-simd-scalar=true -asm-verbose=false | FileCheck %s -; +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=generic -arm64-simd-scalar=true -asm-verbose=false | FileCheck %s -check-prefix=GENERIC + define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone { ; CHECK-LABEL: bar: ; CHECK: add.2d v[[REG:[0-9]+]], v0, v1 ; CHECK: add d[[REG3:[0-9]+]], d[[REG]], d1 ; CHECK: sub d[[REG2:[0-9]+]], d[[REG]], d1 +; GENERIC-LABEL: bar: +; GENERIC: add v[[REG:[0-9]+]].2d, v0.2d, v1.2d +; GENERIC: add d[[REG3:[0-9]+]], d[[REG]], d1 +; GENERIC: sub d[[REG2:[0-9]+]], d[[REG]], d1 %add = add <2 x i64> %a, %b %vgetq_lane = extractelement <2 x i64> %add, i32 0 %vgetq_lane2 = extractelement <2 x i64> %b, i32 0 @@ -19,6 +24,9 @@ define double @subdd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { ; CHECK-LABEL: subdd_su64: ; CHECK: sub d0, d1, d0 ; CHECK-NEXT: ret +; GENERIC-LABEL: subdd_su64: +; GENERIC: sub d0, d1, d0 +; GENERIC-NEXT: ret %vecext = extractelement <2 x i64> %a, i32 0 %vecext1 = extractelement <2 x i64> %b, i32 0 %sub.i = sub nsw i64 %vecext1, %vecext @@ -30,9 +38,30 @@ define double @vaddd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { ; CHECK-LABEL: vaddd_su64: ; CHECK: add d0, d1, d0 ; CHECK-NEXT: ret +; GENERIC-LABEL: vaddd_su64: +; GENERIC: add d0, d1, d0 +; GENERIC-NEXT: ret %vecext = extractelement <2 x i64> %a, i32 0 %vecext1 = extractelement <2 x i64> %b, i32 0 %add.i = add nsw i64 %vecext1, %vecext %retval = bitcast i64 %add.i to double ret double %retval } + +; sub MI doesn't access dsub register. +define double @add_sub_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { +; CHECK-LABEL: add_sub_su64: +; CHECK: add d0, d1, d0 +; CHECK: sub d0, {{d[0-9]+}}, d0 +; CHECK-NEXT: ret +; GENERIC-LABEL: add_sub_su64: +; GENERIC: add d0, d1, d0 +; GENERIC: sub d0, {{d[0-9]+}}, d0 +; GENERIC-NEXT: ret + %vecext = extractelement <2 x i64> %a, i32 0 + %vecext1 = extractelement <2 x i64> %b, i32 0 + %add.i = add i64 %vecext1, %vecext + %sub.i = sub i64 0, %add.i + %retval = bitcast i64 %sub.i to double + ret double %retval +}