From f2b3638c3d47faef91fa2f45582d7c7501045094 Mon Sep 17 00:00:00 2001
From: Karthik Bhat <kv.bhat@samsung.com>
Date: Wed, 7 Jan 2015 06:34:34 +0000
Subject: [PATCH] =?UTF-8?q?Revert=20r225165=20and=20r225169=20Even=20thouh?=
 =?UTF-8?q?=20gcc=20produces=20simialr=20instructions=20as=20Owen=20pointe?=
 =?UTF-8?q?d=20out=20the=20two=20patterns=20aren=E2=80=99t=20equivalent=20?=
 =?UTF-8?q?in=20the=20case=20where=20the=20original=20subtraction=20could?=
 =?UTF-8?q?=20have=20caused=20an=20overflow.=20Reverting=20the=20same.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225341 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/AArch64/AArch64InstrInfo.td       |  39 -----
 test/CodeGen/AArch64/arm64-neon-simd-vabs.ll | 170 -------------------
 2 files changed, 209 deletions(-)
 delete mode 100644 test/CodeGen/AArch64/arm64-neon-simd-vabs.ll
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 46292f86e34..e0fb90a9f62 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2760,40 +2760,6 @@ defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
                                   BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
 defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
 
-// SABD Vd.<T>, Vn.<T>, Vm.<T> Subtracts the elements of Vm from the corresponding
-// elements of Vn, and places the absolute values of the results in the elements of Vd.
-def : Pat<(xor (v8i8 (AArch64vashr (v8i8(sub V64:$Rn, V64:$Rm)), (i32 7))),
-               (v8i8 (add (v8i8(sub V64:$Rn, V64:$Rm)),
-               (AArch64vashr (v8i8(sub V64:$Rn, V64:$Rm)), (i32 7))))),
-          (SABDv8i8 V64:$Rn, V64:$Rm)>;
-def : Pat<(xor (v4i16 (AArch64vashr (v4i16(sub V64:$Rn, V64:$Rm)), (i32 15))),
-               (v4i16 (add (v4i16(sub V64:$Rn, V64:$Rm)),
-               (AArch64vashr (v4i16(sub V64:$Rn, V64:$Rm)), (i32 15))))),
-          (SABDv4i16 V64:$Rn, V64:$Rm)>;
-def : Pat<(xor (v2i32 (AArch64vashr (v2i32(sub V64:$Rn, V64:$Rm)), (i32 31))),
-               (v2i32 (add (v2i32(sub V64:$Rn, V64:$Rm)),
-               (AArch64vashr (v2i32(sub V64:$Rn, V64:$Rm)), (i32 31))))),
-          (SABDv2i32 V64:$Rn, V64:$Rm)>;
-def : Pat<(xor (v16i8 (AArch64vashr (v16i8(sub V128:$Rn, V128:$Rm)), (i32 7))),
-               (v16i8 (add (v16i8(sub V128:$Rn, V128:$Rm)),
-               (AArch64vashr (v16i8(sub V128:$Rn, V128:$Rm)), (i32 7))))),
-          (SABDv16i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(xor (v8i16 (AArch64vashr (v8i16(sub V128:$Rn, V128:$Rm)), (i32 15))),
-               (v8i16 (add (v8i16(sub V128:$Rn, V128:$Rm)),
-               (AArch64vashr (v8i16(sub V128:$Rn, V128:$Rm)), (i32 15))))),
-          (SABDv8i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(xor (v4i32 (AArch64vashr (v4i32(sub V128:$Rn, V128:$Rm)), (i32 31))),
-               (v4i32 (add (v4i32(sub V128:$Rn, V128:$Rm)),
-               (AArch64vashr (v4i32(sub V128:$Rn, V128:$Rm)), (i32 31))))),
-          (SABDv4i32 V128:$Rn, V128:$Rm)>;
-
-def : Pat<(v2f32 (fabs (fsub V64:$Rn, V64:$Rm))),
-          (FABDv2f32 V64:$Rn, V64:$Rm)>;
-def : Pat<(v4f32 (fabs (fsub V128:$Rn, V128:$Rm))),
-          (FABDv4f32 V128:$Rn, V128:$Rm)>;
-def : Pat<(v2f64 (fabs (fsub V128:$Rn, V128:$Rm))),
-          (FABDv2f64 V128:$Rn, V128:$Rm)>;
-
 def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
           (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
 def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
@@ -3083,11 +3049,6 @@ defm UQXTN  : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar
 defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
                                     int_aarch64_neon_usqadd>;
 
-def : Pat<(f32 (fabs (fsub FPR32:$Rn, FPR32:$Rm))),
-          (FABD32 FPR32:$Rn, FPR32:$Rm)>;
-def : Pat<(f64 (fabs (fsub FPR64:$Rn, FPR64:$Rm))),
-          (FABD64 FPR64:$Rn, FPR64:$Rm)>;
-
 def : Pat<(AArch64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>;
 
 def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
diff --git a/test/CodeGen/AArch64/arm64-neon-simd-vabs.ll b/test/CodeGen/AArch64/arm64-neon-simd-vabs.ll
deleted file mode 100644
index dea65deee02..00000000000
--- a/test/CodeGen/AArch64/arm64-neon-simd-vabs.ll
+++ /dev/null
@@ -1,170 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
-target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
-target triple = "aarch64--linux-gnu"
-
-; CHECK: testv4i32
-; CHECK: sabd	v0.4s, v0.4s, v1.4s
-define void @testv4i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){
-  %1 = bitcast i32* %b to <4 x i32>*
-  %2 = load <4 x i32>* %1, align 4
-  %3 = bitcast i32* %c to <4 x i32>*
-  %4 = load <4 x i32>* %3, align 4
-  %5 = sub nsw <4 x i32> %2, %4
-  %6 = icmp sgt <4 x i32> %5, <i32 -1, i32 -1, i32 -1, i32 -1>
-  %7 = sub <4 x i32> zeroinitializer, %5
-  %8 = select <4 x i1> %6, <4 x i32> %5, <4 x i32> %7
-  %9 = bitcast i32* %a to <4 x i32>*
-  store <4 x i32> %8, <4 x i32>* %9, align 4
-  ret void
-}
-
-; CHECK: testv2i32
-; CHECK: sabd	v0.2s, v0.2s, v1.2s
-define void @testv2i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){
-  %1 = bitcast i32* %b to <2 x i32>*
-  %2 = load <2 x i32>* %1, align 4
-  %3 = bitcast i32* %c to <2 x i32>*
-  %4 = load <2 x i32>* %3, align 4
-  %5 = sub nsw <2 x i32> %2, %4
-  %6 = icmp sgt <2 x i32> %5, <i32 -1, i32 -1>
-  %7 = sub <2 x i32> zeroinitializer, %5
-  %8 = select <2 x i1> %6, <2 x i32> %5, <2 x i32> %7
-  %9 = bitcast i32* %a to <2 x i32>*
-  store <2 x i32> %8, <2 x i32>* %9, align 4
-  ret void
-}
-
-; CHECK: testv8i16
-; CHECK: sabd	v0.8h, v0.8h, v1.8h
-define void @testv8i16(i16* noalias nocapture %a, i16* noalias nocapture readonly %b, i16* noalias nocapture readonly %c){
-  %1 = bitcast i16* %b to <8 x i16>*
-  %2 = load <8 x i16>* %1, align 4
-  %3 = bitcast i16* %c to <8 x i16>*
-  %4 = load <8 x i16>* %3, align 4
-  %5 = sub nsw <8 x i16> %2, %4
-  %6 = icmp sgt <8 x i16> %5,  <i16 -1, i16 -1,i16 -1, i16 -1,i16 -1, i16 -1,i16 -1, i16 -1>
-  %7 = sub <8 x i16> zeroinitializer, %5
-  %8 = select <8 x i1> %6, <8 x i16> %5, <8 x i16> %7
-  %9 = bitcast i16* %a to <8 x i16>*
-  store <8 x i16> %8, <8 x i16>* %9, align 4
-  ret void
-}
-
-; CHECK: testv4i16
-; CHECK: sabd	v0.4h, v0.4h, v1.4h
-define void @testv4i16(i16* noalias nocapture %a, i16* noalias nocapture readonly %b, i16* noalias nocapture readonly %c){
-  %1 = bitcast i16* %b to <4 x i16>*
-  %2 = load <4 x i16>* %1, align 4
-  %3 = bitcast i16* %c to <4 x i16>*
-  %4 = load <4 x i16>* %3, align 4
-  %5 = sub nsw <4 x i16> %2, %4
-  %6 = icmp sgt <4 x i16> %5,  <i16 -1, i16 -1,i16 -1, i16 -1>
-  %7 = sub <4 x i16> zeroinitializer, %5
-  %8 = select <4 x i1> %6, <4 x i16> %5, <4 x i16> %7
-  %9 = bitcast i16* %a to <4 x i16>*
-  store <4 x i16> %8, <4 x i16>* %9, align 4
-  ret void
-}
-
-
-; CHECK: testv16i8
-; CHECK: sabd	v0.16b, v0.16b, v1.16b
-define void @testv16i8(i8* noalias nocapture %a, i8* noalias nocapture readonly %b, i8* noalias nocapture readonly %c){
-  %1 = bitcast i8* %b to <16 x i8>*
-  %2 = load <16 x i8>* %1, align 4
-  %3 = bitcast i8* %c to <16 x i8>*
-  %4 = load <16 x i8>* %3, align 4
-  %5 = sub nsw <16 x i8> %2, %4
-  %6 = icmp sgt <16 x i8> %5,  <i8 -1, i8 -1,i8 -1, i8 -1,i8 -1, i8 -1,i8 -1, i8 -1,i8 -1, i8 -1,i8 -1, i8 -1,i8 -1, i8 -1,i8 -1, i8 -1>
-  %7 = sub <16 x i8> zeroinitializer, %5
-  %8 = select <16 x i1> %6, <16 x i8> %5, <16 x i8> %7
-  %9 = bitcast i8* %a to <16 x i8>*
-  store <16 x i8> %8, <16 x i8>* %9, align 4
-  ret void
-}
-
-; CHECK: testv8i8
-; CHECK: sabd	v0.8b, v0.8b, v1.8b
-define void @testv8i8(i8* noalias nocapture %a, i8* noalias nocapture readonly %b, i8* noalias nocapture readonly %c){
-  %1 = bitcast i8* %b to <8 x i8>*
-  %2 = load <8 x i8>* %1, align 4
-  %3 = bitcast i8* %c to <8 x i8>*
-  %4 = load <8 x i8>* %3, align 4
-  %5 = sub nsw <8 x i8> %2, %4
-  %6 = icmp sgt <8 x i8> %5,  <i8 -1, i8 -1,i8 -1, i8 -1,i8 -1, i8 -1,i8 -1, i8 -1>
-  %7 = sub <8 x i8> zeroinitializer, %5
-  %8 = select <8 x i1> %6, <8 x i8> %5, <8 x i8> %7
-  %9 = bitcast i8* %a to <8 x i8>*
-  store <8 x i8> %8, <8 x i8>* %9, align 4
-  ret void
-}
-
-; CHECK: test_v4f32
-; CHECK: fabd	v0.4s, v0.4s, v1.4s
-declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
-define void @test_v4f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, float* noalias nocapture readonly %c){
-  %1 = bitcast float* %b to <4 x float>*
-  %2 = load <4 x float>* %1
-  %3 = bitcast float* %c to <4 x float>*
-  %4 = load <4 x float>* %3
-  %5 = fsub <4 x float> %2, %4
-  %6 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %5)
-  %7 = bitcast float* %a to <4 x float>*
-  store <4 x float> %6, <4 x float>* %7
-  ret void
-}
-
-; CHECK: test_v2f32
-; CHECK: fabd	v0.2s, v0.2s, v1.2s
-declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
-define void @test_v2f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, float* noalias nocapture readonly %c){
-  %1 = bitcast float* %b to <2 x float>*
-  %2 = load <2 x float>* %1
-  %3 = bitcast float* %c to <2 x float>*
-  %4 = load <2 x float>* %3
-  %5 = fsub <2 x float> %2, %4
-  %6 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %5)
-  %7 = bitcast float* %a to <2 x float>*
-  store <2 x float> %6, <2 x float>* %7
-  ret void
-}
-
-; CHECK: test_v2f64
-; CHECK: fabd	v0.2d, v0.2d, v1.2d
-declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
-define void @test_v2f64(double* noalias nocapture %a, double* noalias nocapture readonly %b, double* noalias nocapture readonly %c){
-  %1 = bitcast double* %b to <2 x double>*
-  %2 = load <2 x double>* %1
-  %3 = bitcast double* %c to <2 x double>*
-  %4 = load <2 x double>* %3
-  %5 = fsub <2 x double> %2, %4
-  %6 = call <2 x double> @llvm.fabs.v2f64(<2 x double> %5)
-  %7 = bitcast double* %a to <2 x double>*
-  store <2 x double> %6, <2 x double>* %7
-  ret void
-}
-
-@a = common global float 0.000000e+00
-declare float @fabsf(float)
-; CHECK: test_fabd32
-; CHECK: fabd	s0, s0, s1
-define void @test_fabd32(float %b, float %c) {
-  %1 = fsub float %b, %c
-  %fabsf = tail call float @fabsf(float %1) #0
-  store float %fabsf, float* @a
-  ret void
-}
-
-@d = common global double 0.000000e+00
-declare double @fabs(double)
-; CHECK: test_fabd64
-; CHECK: fabd	d0, d0, d1
-define void @test_fabd64(double %b, double %c) {
-  %1 = fsub double %b, %c
-  %2 = tail call double @fabs(double %1) #0
-  store double %2, double* @d
-  ret void
-}
-
-attributes #0 = { nounwind readnone}
-