From 84c7078ddcf49a16a1af83b9638a21c2d166368f Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 2 Oct 2014 07:30:24 +0000 Subject: [PATCH] [x86] Merge the bitwise operation shuffle combining into the common test file, adding assertions across the ISA variants for it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218858 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/combine-vec-shuffle.ll | 253 ---------- test/CodeGen/X86/vector-shuffle-combining.ll | 468 +++++++++++++++++++ 2 files changed, 468 insertions(+), 253 deletions(-) delete mode 100644 test/CodeGen/X86/combine-vec-shuffle.ll diff --git a/test/CodeGen/X86/combine-vec-shuffle.ll b/test/CodeGen/X86/combine-vec-shuffle.ll deleted file mode 100644 index 9e6ab892713..00000000000 --- a/test/CodeGen/X86/combine-vec-shuffle.ll +++ /dev/null @@ -1,253 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s - -; Verify that the DAGCombiner correctly folds according to the following rules: - -; fold (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) -; fold (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) -; fold (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) - -; fold (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) -; fold (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) -; fold (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) - - - -define <4 x i32> @test1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { - %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> - %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> - %and = and <4 x i32> %shuf1, %shuf2 - ret <4 x i32> %and -} -; CHECK-LABEL: test1 -; CHECK-NOT: pshufd -; CHECK: pand -; CHECK-NEXT: pshufd -; CHECK-NEXT: ret - - -define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { - %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> - %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> - %or = or <4 x i32> %shuf1, %shuf2 - ret <4 x i32> %or -} -; CHECK-LABEL: test2 -; CHECK-NOT: pshufd -; CHECK: por -; CHECK-NEXT: pshufd -; CHECK-NEXT: ret - - -define <4 x i32> @test3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { - %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> - %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> - %xor = xor <4 x i32> %shuf1, %shuf2 - ret <4 x i32> %xor -} -; CHECK-LABEL: test3 -; CHECK-NOT: pshufd -; CHECK: pxor -; CHECK-NEXT: pshufd -; CHECK-NEXT: ret - - -define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { - %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> - %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> - %and = and <4 x i32> %shuf1, %shuf2 - ret <4 x i32> %and -} -; CHECK-LABEL: test4 -; CHECK-NOT: pshufd -; CHECK: pand -; CHECK-NEXT: pshufd -; CHECK-NEXT: ret - - -define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { - %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> - %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> - %or = or <4 x i32> %shuf1, %shuf2 - ret <4 x i32> %or -} -; CHECK-LABEL: test5 -; CHECK-NOT: pshufd -; CHECK: por -; CHECK-NEXT: pshufd -; CHECK-NEXT: ret - - -define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { - %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> - %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> - %xor = xor <4 x i32> %shuf1, %shuf2 - ret <4 x i32> %xor -} -; CHECK-LABEL: test6 -; CHECK-NOT: pshufd -; CHECK: pxor -; CHECK-NEXT: pshufd -; CHECK-NEXT: ret - - -; Verify that DAGCombiner moves the shuffle after the xor/and/or even if shuffles -; are not performing a swizzle operations. - -define <4 x i32> @test1b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { - %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> - %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> - %and = and <4 x i32> %shuf1, %shuf2 - ret <4 x i32> %and -} -; CHECK-LABEL: test1b -; CHECK-NOT: blendps -; CHECK: andps -; CHECK-NEXT: blendps -; CHECK-NEXT: ret - - -define <4 x i32> @test2b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { - %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> - %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> - %or = or <4 x i32> %shuf1, %shuf2 - ret <4 x i32> %or -} -; CHECK-LABEL: test2b -; CHECK-NOT: blendps -; CHECK: orps -; CHECK-NEXT: blendps -; CHECK-NEXT: ret - - -define <4 x i32> @test3b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { - %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> - %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> - %xor = xor <4 x i32> %shuf1, %shuf2 - ret <4 x i32> %xor -} -; CHECK-LABEL: test3b -; CHECK-NOT: blendps -; CHECK: xorps -; CHECK-NEXT: xorps -; CHECK-NEXT: blendps -; CHECK-NEXT: ret - - -define <4 x i32> @test4b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { - %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> - %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> - %and = and <4 x i32> %shuf1, %shuf2 - ret <4 x i32> %and -} -; CHECK-LABEL: test4b -; CHECK-NOT: blendps -; CHECK: andps -; CHECK-NEXT: blendps -; CHECK: ret - - -define <4 x i32> @test5b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { - %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> - %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> - %or = or <4 x i32> %shuf1, %shuf2 - ret <4 x i32> %or -} -; CHECK-LABEL: test5b -; CHECK-NOT: blendps -; CHECK: orps -; CHECK-NEXT: blendps -; CHECK: ret - - -define <4 x i32> @test6b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { - %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> - %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> - %xor = xor <4 x i32> %shuf1, %shuf2 - ret <4 x i32> %xor -} -; CHECK-LABEL: test6b -; CHECK-NOT: blendps -; CHECK: xorps -; CHECK-NEXT: xorps -; CHECK-NEXT: blendps -; CHECK: ret - -define <4 x i32> @test1c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { - %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> - %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> - %and = and <4 x i32> %shuf1, %shuf2 - ret <4 x i32> %and -} -; CHECK-LABEL: test1c -; CHECK-NOT: shufps -; CHECK: andps -; CHECK-NEXT: shufps -; CHECK-NEXT: ret - - -define <4 x i32> @test2c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { - %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> - %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> - %or = or <4 x i32> %shuf1, %shuf2 - ret <4 x i32> %or -} -; CHECK-LABEL: test2c -; CHECK-NOT: shufps -; CHECK: orps -; CHECK-NEXT: shufps -; CHECK-NEXT: ret - - -define <4 x i32> @test3c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { - %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> - %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> - %xor = xor <4 x i32> %shuf1, %shuf2 - ret <4 x i32> %xor -} -; CHECK-LABEL: test3c -; CHECK-NOT: shufps -; CHECK: xorps -; CHECK-NEXT: xorps -; CHECK-NEXT: shufps -; CHECK-NEXT: ret - - -define <4 x i32> @test4c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { - %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> - %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> - %and = and <4 x i32> %shuf1, %shuf2 - ret <4 x i32> %and -} -; CHECK-LABEL: test4c -; CHECK-NOT: shufps -; CHECK: andps -; CHECK-NEXT: shufps -; CHECK: ret - - -define <4 x i32> @test5c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { - %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> - %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> - %or = or <4 x i32> %shuf1, %shuf2 - ret <4 x i32> %or -} -; CHECK-LABEL: test5c -; CHECK-NOT: shufps -; CHECK: orps -; CHECK-NEXT: shufps -; CHECK: ret - - -define <4 x i32> @test6c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { - %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> - %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> - %xor = xor <4 x i32> %shuf1, %shuf2 - ret <4 x i32> %xor -} -; CHECK-LABEL: test6c -; CHECK-NOT: shufps -; CHECK: xorps -; CHECK-NEXT: xorps -; CHECK-NEXT: shufps -; CHECK: ret - diff --git a/test/CodeGen/X86/vector-shuffle-combining.ll b/test/CodeGen/X86/vector-shuffle-combining.ll index 063dba73cd7..7255d8f7bc7 100644 --- a/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/test/CodeGen/X86/vector-shuffle-combining.ll @@ -159,3 +159,471 @@ entry: %d = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %c, i8 27) ret <8 x i16> %d } + +define <4 x i32> @combine_bitwise_ops_test1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; SSE-LABEL: combine_bitwise_ops_test1: +; SSE: # BB#0: +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE-NEXT: retq +; +; AVX-LABEL: combine_bitwise_ops_test1: +; AVX: # BB#0: +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX-NEXT: retq + %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> + %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> + %and = and <4 x i32> %shuf1, %shuf2 + ret <4 x i32> %and +} + +define <4 x i32> @combine_bitwise_ops_test2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; SSE-LABEL: combine_bitwise_ops_test2: +; SSE: # BB#0: +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE-NEXT: retq +; +; AVX-LABEL: combine_bitwise_ops_test2: +; AVX: # BB#0: +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX-NEXT: retq + %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> + %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> + %or = or <4 x i32> %shuf1, %shuf2 + ret <4 x i32> %or +} + +define <4 x i32> @combine_bitwise_ops_test3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; SSE-LABEL: combine_bitwise_ops_test3: +; SSE: # BB#0: +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE-NEXT: retq +; +; AVX-LABEL: combine_bitwise_ops_test3: +; AVX: # BB#0: +; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX-NEXT: retq + %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> + %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> + %xor = xor <4 x i32> %shuf1, %shuf2 + ret <4 x i32> %xor +} + +define <4 x i32> @combine_bitwise_ops_test4(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; SSE-LABEL: combine_bitwise_ops_test4: +; SSE: # BB#0: +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE-NEXT: retq +; +; AVX-LABEL: combine_bitwise_ops_test4: +; AVX: # BB#0: +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX-NEXT: retq + %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> + %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> + %and = and <4 x i32> %shuf1, %shuf2 + ret <4 x i32> %and +} + +define <4 x i32> @combine_bitwise_ops_test5(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; SSE-LABEL: combine_bitwise_ops_test5: +; SSE: # BB#0: +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE-NEXT: retq +; +; AVX-LABEL: combine_bitwise_ops_test5: +; AVX: # BB#0: +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX-NEXT: retq + %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> + %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> + %or = or <4 x i32> %shuf1, %shuf2 + ret <4 x i32> %or +} + +define <4 x i32> @combine_bitwise_ops_test6(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; SSE-LABEL: combine_bitwise_ops_test6: +; SSE: # BB#0: +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE-NEXT: retq +; +; AVX-LABEL: combine_bitwise_ops_test6: +; AVX: # BB#0: +; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX-NEXT: retq + %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> + %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> + %xor = xor <4 x i32> %shuf1, %shuf2 + ret <4 x i32> %xor +} + + +; Verify that DAGCombiner moves the shuffle after the xor/and/or even if shuffles +; are not performing a swizzle operations. + +define <4 x i32> @combine_bitwise_ops_test1b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; SSE2-LABEL: combine_bitwise_ops_test1b: +; SSE2: # BB#0: +; SSE2-NEXT: andps %xmm1, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: combine_bitwise_ops_test1b: +; SSSE3: # BB#0: +; SSSE3-NEXT: andps %xmm1, %xmm0 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: combine_bitwise_ops_test1b: +; SSE41: # BB#0: +; SSE41-NEXT: andps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; SSE41-NEXT: retq +; +; AVX1-LABEL: combine_bitwise_ops_test1b: +; AVX1: # BB#0: +; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: combine_bitwise_ops_test1b: +; AVX2: # BB#0: +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX2-NEXT: retq + %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> + %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> + %and = and <4 x i32> %shuf1, %shuf2 + ret <4 x i32> %and +} + +define <4 x i32> @combine_bitwise_ops_test2b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; SSE2-LABEL: combine_bitwise_ops_test2b: +; SSE2: # BB#0: +; SSE2-NEXT: orps %xmm1, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: combine_bitwise_ops_test2b: +; SSSE3: # BB#0: +; SSSE3-NEXT: orps %xmm1, %xmm0 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: combine_bitwise_ops_test2b: +; SSE41: # BB#0: +; SSE41-NEXT: orps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; SSE41-NEXT: retq +; +; AVX1-LABEL: combine_bitwise_ops_test2b: +; AVX1: # BB#0: +; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: combine_bitwise_ops_test2b: +; AVX2: # BB#0: +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX2-NEXT: retq + %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> + %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> + %or = or <4 x i32> %shuf1, %shuf2 + ret <4 x i32> %or +} + +define <4 x i32> @combine_bitwise_ops_test3b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; SSE2-LABEL: combine_bitwise_ops_test3b: +; SSE2: # BB#0: +; SSE2-NEXT: xorps %xmm1, %xmm0 +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: combine_bitwise_ops_test3b: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorps %xmm1, %xmm0 +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: combine_bitwise_ops_test3b: +; SSE41: # BB#0: +; SSE41-NEXT: xorps %xmm1, %xmm0 +; SSE41-NEXT: xorps %xmm1, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; SSE41-NEXT: retq +; +; AVX1-LABEL: combine_bitwise_ops_test3b: +; AVX1: # BB#0: +; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: combine_bitwise_ops_test3b: +; AVX2: # BB#0: +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; AVX2-NEXT: retq + %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> + %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> + %xor = xor <4 x i32> %shuf1, %shuf2 + ret <4 x i32> %xor +} + +define <4 x i32> @combine_bitwise_ops_test4b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; SSE2-LABEL: combine_bitwise_ops_test4b: +; SSE2: # BB#0: +; SSE2-NEXT: andps %xmm1, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: combine_bitwise_ops_test4b: +; SSSE3: # BB#0: +; SSSE3-NEXT: andps %xmm1, %xmm0 +; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: combine_bitwise_ops_test4b: +; SSE41: # BB#0: +; SSE41-NEXT: andps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] +; SSE41-NEXT: movaps %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: combine_bitwise_ops_test4b: +; AVX1: # BB#0: +; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: combine_bitwise_ops_test4b: +; AVX2: # BB#0: +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] +; AVX2-NEXT: retq + %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> + %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> + %and = and <4 x i32> %shuf1, %shuf2 + ret <4 x i32> %and +} + +define <4 x i32> @combine_bitwise_ops_test5b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; SSE2-LABEL: combine_bitwise_ops_test5b: +; SSE2: # BB#0: +; SSE2-NEXT: orps %xmm1, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: combine_bitwise_ops_test5b: +; SSSE3: # BB#0: +; SSSE3-NEXT: orps %xmm1, %xmm0 +; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: combine_bitwise_ops_test5b: +; SSE41: # BB#0: +; SSE41-NEXT: orps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] +; SSE41-NEXT: movaps %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: combine_bitwise_ops_test5b: +; AVX1: # BB#0: +; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: combine_bitwise_ops_test5b: +; AVX2: # BB#0: +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] +; AVX2-NEXT: retq + %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> + %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> + %or = or <4 x i32> %shuf1, %shuf2 + ret <4 x i32> %or +} + +define <4 x i32> @combine_bitwise_ops_test6b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; SSE2-LABEL: combine_bitwise_ops_test6b: +; SSE2: # BB#0: +; SSE2-NEXT: xorps %xmm1, %xmm0 +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,1,3] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: combine_bitwise_ops_test6b: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorps %xmm1, %xmm0 +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,1,3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: combine_bitwise_ops_test6b: +; SSE41: # BB#0: +; SSE41-NEXT: xorps %xmm1, %xmm0 +; SSE41-NEXT: xorps %xmm1, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] +; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: combine_bitwise_ops_test6b: +; AVX1: # BB#0: +; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: combine_bitwise_ops_test6b: +; AVX2: # BB#0: +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] +; AVX2-NEXT: retq + %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> + %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> + %xor = xor <4 x i32> %shuf1, %shuf2 + ret <4 x i32> %xor +} + +define <4 x i32> @combine_bitwise_ops_test1c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; SSE-LABEL: combine_bitwise_ops_test1c: +; SSE: # BB#0: +; SSE-NEXT: andps %xmm1, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3] +; SSE-NEXT: retq +; +; AVX-LABEL: combine_bitwise_ops_test1c: +; AVX: # BB#0: +; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3] +; AVX-NEXT: retq + %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> + %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> + %and = and <4 x i32> %shuf1, %shuf2 + ret <4 x i32> %and +} + +define <4 x i32> @combine_bitwise_ops_test2c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; SSE-LABEL: combine_bitwise_ops_test2c: +; SSE: # BB#0: +; SSE-NEXT: orps %xmm1, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3] +; SSE-NEXT: retq +; +; AVX-LABEL: combine_bitwise_ops_test2c: +; AVX: # BB#0: +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3] +; AVX-NEXT: retq + %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> + %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> + %or = or <4 x i32> %shuf1, %shuf2 + ret <4 x i32> %or +} + +define <4 x i32> @combine_bitwise_ops_test3c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; SSE-LABEL: combine_bitwise_ops_test3c: +; SSE: # BB#0: +; SSE-NEXT: xorps %xmm1, %xmm0 +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] +; SSE-NEXT: retq +; +; AVX-LABEL: combine_bitwise_ops_test3c: +; AVX: # BB#0: +; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] +; AVX-NEXT: retq + %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> + %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> + %xor = xor <4 x i32> %shuf1, %shuf2 + ret <4 x i32> %xor +} + +define <4 x i32> @combine_bitwise_ops_test4c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; SSE-LABEL: combine_bitwise_ops_test4c: +; SSE: # BB#0: +; SSE-NEXT: andps %xmm1, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3] +; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_bitwise_ops_test4c: +; AVX: # BB#0: +; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm2[0,2],xmm0[1,3] +; AVX-NEXT: retq + %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> + %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> + %and = and <4 x i32> %shuf1, %shuf2 + ret <4 x i32> %and +} + +define <4 x i32> @combine_bitwise_ops_test5c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; SSE-LABEL: combine_bitwise_ops_test5c: +; SSE: # BB#0: +; SSE-NEXT: orps %xmm1, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3] +; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_bitwise_ops_test5c: +; AVX: # BB#0: +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm2[0,2],xmm0[1,3] +; AVX-NEXT: retq + %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> + %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> + %or = or <4 x i32> %shuf1, %shuf2 + ret <4 x i32> %or +} + +define <4 x i32> @combine_bitwise_ops_test6c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; SSE-LABEL: combine_bitwise_ops_test6c: +; SSE: # BB#0: +; SSE-NEXT: xorps %xmm1, %xmm0 +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3] +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_bitwise_ops_test6c: +; AVX: # BB#0: +; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[1,3] +; AVX-NEXT: retq + %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> + %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> + %xor = xor <4 x i32> %shuf1, %shuf2 + ret <4 x i32> %xor +}