From 138a5c66b9ccaded2ee5d63b96f69349c098e49a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 2 Dec 2011 07:16:01 +0000 Subject: [PATCH] Add instruction selection support for horizontal add/sub of 256-bit floating point vectors. Also add the test case for 256-bit integer vectors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145680 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 6 +- test/CodeGen/X86/avx2-phaddsub.ll | 73 ++++++++++++++++++++++++ test/CodeGen/X86/haddsub.ll | 91 ++++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/X86/avx2-phaddsub.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d2130814872..f1683ae7194 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -14443,7 +14443,8 @@ static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, SDValue RHS = N->getOperand(1); // Try to synthesize horizontal adds from adds of shuffles. - if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) && + if (((Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || + (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) && isHorizontalBinOp(LHS, RHS, true)) return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS); return SDValue(); @@ -14457,7 +14458,8 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG, SDValue RHS = N->getOperand(1); // Try to synthesize horizontal subs from subs of shuffles. - if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) && + if (((Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || + (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) && isHorizontalBinOp(LHS, RHS, false)) return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS); return SDValue(); diff --git a/test/CodeGen/X86/avx2-phaddsub.ll b/test/CodeGen/X86/avx2-phaddsub.ll new file mode 100644 index 00000000000..4eac71d08b4 --- /dev/null +++ b/test/CodeGen/X86/avx2-phaddsub.ll @@ -0,0 +1,73 @@ +; RUN: llc < %s -march=x86-64 -mattr=+avx2 | FileCheck %s + +; CHECK: phaddw1: +; CHECK: vphaddw +define <16 x i16> @phaddw1(<16 x i16> %x, <16 x i16> %y) { + %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> + %b = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> + %r = add <16 x i16> %a, %b + ret <16 x i16> %r +} + +; CHECK: phaddw2: +; CHECK: vphaddw +define <16 x i16> @phaddw2(<16 x i16> %x, <16 x i16> %y) { + %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> + %b = shufflevector <16 x i16> %y, <16 x i16> %x, <16 x i32> + %r = add <16 x i16> %a, %b + ret <16 x i16> %r +} + +; CHECK: phaddd1: +; CHECK: vphaddd +define <8 x i32> @phaddd1(<8 x i32> %x, <8 x i32> %y) { + %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> + %b = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> + %r = add <8 x i32> %a, %b + ret <8 x i32> %r +} + +; CHECK: phaddd2: +; CHECK: vphaddd +define <8 x i32> @phaddd2(<8 x i32> %x, <8 x i32> %y) { + %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> + %b = shufflevector <8 x i32> %y, <8 x i32> %x, <8 x i32> + %r = add <8 x i32> %a, %b + ret <8 x i32> %r +} + +; CHECK: phaddd3: +; CHECK: vphaddd +define <8 x i32> @phaddd3(<8 x i32> %x) { + %a = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> + %b = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> + %r = add <8 x i32> %a, %b + ret <8 x i32> %r +} + +; CHECK: phsubw1: +; CHECK: vphsubw +define <16 x i16> @phsubw1(<16 x i16> %x, <16 x i16> %y) { + %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> + %b = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> + %r = sub <16 x i16> %a, %b + ret <16 x i16> %r +} + +; CHECK: phsubd1: +; CHECK: vphsubd +define <8 x i32> @phsubd1(<8 x i32> %x, <8 x i32> %y) { + %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> + %b = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> + %r = sub <8 x i32> %a, %b + ret <8 x i32> %r +} + +; CHECK: phsubd2: +; CHECK: vphsubd +define <8 x i32> @phsubd2(<8 x i32> %x, <8 x i32> %y) { + %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> + %b = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> + %r = sub <8 x i32> %a, %b + ret <8 x i32> %r +} diff --git a/test/CodeGen/X86/haddsub.ll b/test/CodeGen/X86/haddsub.ll index 91758ead636..5f1f4fd8f76 100644 --- a/test/CodeGen/X86/haddsub.ll +++ b/test/CodeGen/X86/haddsub.ll @@ -192,3 +192,94 @@ define <4 x float> @hsubps4(<4 x float> %x) { %r = fsub <4 x float> %a, %b ret <4 x float> %r } + +; SSE3: vhaddps1: +; SSE3-NOT: vhaddps +; SSE3: haddps +; SSE3: haddps +; AVX: vhaddps1: +; AVX: vhaddps +define <8 x float> @vhaddps1(<8 x float> %x, <8 x float> %y) { + %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> + %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> + %r = fadd <8 x float> %a, %b + ret <8 x float> %r +} + +; SSE3: vhaddps2: +; SSE3-NOT: vhaddps +; SSE3: haddps +; SSE3: haddps +; AVX: vhaddps2: +; AVX: vhaddps +define <8 x float> @vhaddps2(<8 x float> %x, <8 x float> %y) { + %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> + %b = shufflevector <8 x float> %y, <8 x float> %x, <8 x i32> + %r = fadd <8 x float> %a, %b + ret <8 x float> %r +} + +; SSE3: vhaddps3: +; SSE3-NOT: vhaddps +; SSE3: haddps +; SSE3: haddps +; AVX: vhaddps3: +; AVX: vhaddps +define <8 x float> @vhaddps3(<8 x float> %x) { + %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> + %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> + %r = fadd <8 x float> %a, %b + ret <8 x float> %r +} + +; SSE3: vhsubps1: +; SSE3-NOT: vhsubps +; SSE3: hsubps +; SSE3: hsubps +; AVX: vhsubps1: +; AVX: vhsubps +define <8 x float> @vhsubps1(<8 x float> %x, <8 x float> %y) { + %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> + %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> + %r = fsub <8 x float> %a, %b + ret <8 x float> %r +} + +; SSE3: vhsubps3: +; SSE3-NOT: vhsubps +; SSE3: hsubps +; SSE3: hsubps +; AVX: vhsubps3: +; AVX: vhsubps +define <8 x float> @vhsubps3(<8 x float> %x) { + %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> + %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> + %r = fsub <8 x float> %a, %b + ret <8 x float> %r +} + +; SSE3: vhaddpd1: +; SSE3-NOT: vhaddpd +; SSE3: haddpd +; SSE3: haddpd +; AVX: vhaddpd1: +; AVX: vhaddpd +define <4 x double> @vhaddpd1(<4 x double> %x, <4 x double> %y) { + %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> + %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> + %r = fadd <4 x double> %a, %b + ret <4 x double> %r +} + +; SSE3: vhsubpd1: +; SSE3-NOT: vhsubpd +; SSE3: hsubpd +; SSE3: hsubpd +; AVX: vhsubpd1: +; AVX: vhsubpd +define <4 x double> @vhsubpd1(<4 x double> %x, <4 x double> %y) { + %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> + %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> + %r = fsub <4 x double> %a, %b + ret <4 x double> %r +}