From 5a37cccc7ee4983f8c555711dc82b26fce67cecc Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Wed, 23 Jul 2014 09:11:48 +0000 Subject: [PATCH] [x86] Clean up a test case to use check labels and spell out the exact instruction sequences with CHECK-NEXT for these test cases. This notably exposes how absolutely horrible the generated code is for several of these test cases, and will make any future updates to the test as our vector instruction selection gets better. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213732 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/widen_load-2.ll | 190 +++++++++++++++++++------------ 1 file changed, 116 insertions(+), 74 deletions(-) diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll index 41bea859f47..0c2b2efc99e 100644 --- a/test/CodeGen/X86/widen_load-2.ll +++ b/test/CodeGen/X86/widen_load-2.ll @@ -4,12 +4,12 @@ ; %i32vec3 = type <3 x i32> -; CHECK: add3i32 define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { -; CHECK: movdqa -; CHECK: paddd -; CHECK: pextrd -; CHECK: movq +; CHECK-LABEL: add3i32: +; CHECK: movdqa (%{{.*}}), %[[R0:xmm[0-9]+]] +; CHECK-NEXT: paddd (%{{.*}}), %[[R0]] +; CHECK-NEXT: pextrd $2, %[[R0]], 8(%{{.*}}) +; CHECK-NEXT: movq %[[R0]], (%{{.*}}) %a = load %i32vec3* %ap, align 16 %b = load %i32vec3* %bp, align 16 %x = add %i32vec3 %a, %b @@ -17,15 +17,15 @@ define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { ret void } -; CHECK: add3i32_2 define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { -; CHECK: movq -; CHECK: pinsrd -; CHECK: movq -; CHECK: pinsrd -; CHECK: paddd -; CHECK: pextrd -; CHECK: movq +; CHECK-LABEL: add3i32_2: +; CHECK: movq (%{{.*}}), %[[R0:xmm[0-9]+]] +; CHECK-NEXT: pinsrd $2, 8(%{{.*}}), %[[R0]] +; CHECK-NEXT: movq (%{{.*}}), %[[R1:xmm[0-9]+]] +; CHECK-NEXT: pinsrd $2, 8(%{{.*}}), %[[R1]] +; CHECK-NEXT: paddd %[[R0]], %[[R1]] +; CHECK-NEXT: pextrd $2, %[[R1]], 8(%{{.*}}) +; CHECK-NEXT: movq %[[R1]], (%{{.*}}) %a = load %i32vec3* %ap, align 8 %b = load %i32vec3* %bp, align 8 %x = add %i32vec3 %a, %b @@ -34,15 +34,15 @@ define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { } %i32vec7 = type <7 x i32> -; CHECK: add7i32 define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) { -; CHECK: movdqa -; CHECK: movdqa -; CHECK: paddd -; CHECK: paddd -; CHECK: pextrd -; CHECK: movq -; CHECK: movdqa +; CHECK-LABEL: add7i32: +; CHECK: movdqa (%{{.*}}), %[[R0:xmm[0-9]+]] +; CHECK-NEXT: movdqa 16(%{{.*}}), %[[R1:xmm[0-9]+]] +; CHECK-NEXT: paddd (%{{.*}}), %[[R0]] +; CHECK-NEXT: paddd 16(%{{.*}}), %[[R1]] +; CHECK-NEXT: pextrd $2, %[[R1]], 24(%{{.*}}) +; CHECK-NEXT: movq %[[R1]], 16(%{{.*}}) +; CHECK-NEXT: movdqa %[[R0]], (%{{.*}}) %a = load %i32vec7* %ap, align 16 %b = load %i32vec7* %bp, align 16 %x = add %i32vec7 %a, %b @@ -50,18 +50,18 @@ define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) { ret void } -; CHECK: add12i32 %i32vec12 = type <12 x i32> define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) { -; CHECK: movdqa -; CHECK: movdqa -; CHECK: movdqa -; CHECK: paddd -; CHECK: paddd -; CHECK: paddd -; CHECK: movdqa -; CHECK: movdqa -; CHECK: movdqa +; CHECK-LABEL: add12i32: +; CHECK: movdqa (%{{.*}}), %[[R0:xmm[0-9]+]] +; CHECK-NEXT: movdqa 16(%{{.*}}), %[[R1:xmm[0-9]+]] +; CHECK-NEXT: movdqa 32(%{{.*}}), %[[R2:xmm[0-9]+]] +; CHECK-NEXT: paddd (%{{.*}}), %[[R0]] +; CHECK-NEXT: paddd 16(%{{.*}}), %[[R1]] +; CHECK-NEXT: paddd 32(%{{.*}}), %[[R2]] +; CHECK-NEXT: movdqa %[[R2]], 32(%{{.*}}) +; CHECK-NEXT: movdqa %[[R1]], 16(%{{.*}}) +; CHECK-NEXT: movdqa %[[R0]], (%{{.*}}) %a = load %i32vec12* %ap, align 16 %b = load %i32vec12* %bp, align 16 %x = add %i32vec12 %a, %b @@ -70,11 +70,18 @@ define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) { } -; CHECK: add3i16 %i16vec3 = type <3 x i16> define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind { -; CHECK: paddd -; CHECK: ret +; CHECK-LABEL: add3i16: +; CHECK: pmovzxwd (%{{.*}}), %[[R0:xmm[0-9]+]] +; CHECK-NEXT: pmovzxwd (%{{.*}}), %[[R1:xmm[0-9]+]] +; CHECK-NEXT: paddd %[[R0]], %[[R1]] +; CHECK-NEXT: movdqa %[[R1]], %[[R0]] +; CHECK-NEXT: pshufb {{.*}}, %[[R0]] +; CHECK-NEXT: movd %[[R0]], %r[[R3:[abcd]]]x +; CHECK-NEXT: movd %r[[R3]]x, %[[R0]] +; CHECK-NEXT: pextrw $4, %[[R1]], 4(%{{.*}}) +; CHECK-NEXT: movd %[[R0]], (%{{.*}}) %a = load %i16vec3* %ap, align 16 %b = load %i16vec3* %bp, align 16 %x = add %i16vec3 %a, %b @@ -82,11 +89,14 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp ret void } -; CHECK: add4i16 %i16vec4 = type <4 x i16> define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind { -; CHECK: paddd -; CHECK: movq +; CHECK-LABEL: add4i16: +; CHECK: pmovzxwd (%{{.*}}), %[[R0:xmm[0-9]+]] +; CHECK-NEXT: pmovzxwd (%{{.*}}), %[[R1:xmm[0-9]+]] +; CHECK-NEXT: paddd %[[R0]], %[[R1]] +; CHECK-NEXT: pshufb {{.*}}, %[[R1]] +; CHECK-NEXT: movq %[[R1]], (%{{.*}}) %a = load %i16vec4* %ap, align 16 %b = load %i16vec4* %bp, align 16 %x = add %i16vec4 %a, %b @@ -94,15 +104,15 @@ define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp ret void } -; CHECK: add12i16 %i16vec12 = type <12 x i16> define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind { -; CHECK: movdqa -; CHECK: movdqa -; CHECK: paddw -; CHECK: paddw -; CHECK: movq -; CHECK: movdqa +; CHECK-LABEL: add12i16: +; CHECK: movdqa (%{{.*}}), %[[R0:xmm[0-9]+]] +; CHECK-NEXT: movdqa 16(%{{.*}}), %[[R1:xmm[0-9]+]] +; CHECK-NEXT: paddw (%{{.*}}), %[[R0]] +; CHECK-NEXT: paddw 16(%{{.*}}), %[[R1]] +; CHECK-NEXT: movq %[[R1]], 16(%{{.*}}) +; CHECK-NEXT: movdqa %[[R0]], (%{{.*}}) %a = load %i16vec12* %ap, align 16 %b = load %i16vec12* %bp, align 16 %x = add %i16vec12 %a, %b @@ -110,18 +120,18 @@ define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* ret void } -; CHECK: add18i16 %i16vec18 = type <18 x i16> define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind { -; CHECK: movdqa -; CHECK: movdqa -; CHECK: movdqa -; CHECK: paddw -; CHECK: paddw -; CHECK: paddw -; CHECK: movd -; CHECK: movdqa -; CHECK: movdqa +; CHECK-LABEL: add18i16: +; CHECK: movdqa (%{{.*}}), %[[R0:xmm[0-9]+]] +; CHECK-NEXT: movdqa 16(%{{.*}}), %[[R1:xmm[0-9]+]] +; CHECK-NEXT: movdqa 32(%{{.*}}), %[[R2:xmm[0-9]+]] +; CHECK-NEXT: paddw (%{{.*}}), %[[R0]] +; CHECK-NEXT: paddw 16(%{{.*}}), %[[R1]] +; CHECK-NEXT: paddw 32(%{{.*}}), %[[R2]] +; CHECK-NEXT: movd %[[R2]], 32(%{{.*}}) +; CHECK-NEXT: movdqa %[[R1]], 16(%{{.*}}) +; CHECK-NEXT: movdqa %[[R0]], (%{{.*}}) %a = load %i16vec18* %ap, align 16 %b = load %i16vec18* %bp, align 16 %x = add %i16vec18 %a, %b @@ -130,11 +140,17 @@ define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* } -; CHECK: add3i8 %i8vec3 = type <3 x i8> define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind { -; CHECK: paddd -; CHECK: ret +; CHECK-LABEL: add3i8: +; CHECK: pmovzxbd (%{{.*}}), %[[R0:xmm[0-9]+]] +; CHECK-NEXT: pmovzxbd (%{{.*}}), %[[R1:xmm[0-9]+]] +; CHECK-NEXT: paddd %[[R0]], %[[R1]] +; CHECK-NEXT: movdqa %[[R1]], %[[R0]] +; CHECK-NEXT: pshufb {{.*}}, %[[R0]] +; CHECK-NEXT: movd %[[R0]], %e[[R3:[abcd]]]x +; CHECK-NEXT: pextrb $8, %[[R1]], 2(%{{.*}}) +; CHECK-NEXT: movw %[[R3]]x, (%{{.*}}) %a = load %i8vec3* %ap, align 16 %b = load %i8vec3* %bp, align 16 %x = add %i8vec3 %a, %b @@ -142,17 +158,18 @@ define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) no ret void } -; CHECK-LABEL: add31i8: %i8vec31 = type <31 x i8> define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind { -; CHECK: movdqa -; CHECK: movdqa -; CHECK: paddb -; CHECK: paddb -; CHECK: pextrb -; CHECK: pextrw -; CHECK: movq -; CHECK: ret +; CHECK-LABEL: add31i8: +; CHECK: movdqa (%{{.*}}), %[[R0:xmm[0-9]+]] +; CHECK-NEXT: movdqa 16(%{{.*}}), %[[R1:xmm[0-9]+]] +; CHECK-NEXT: paddb (%{{.*}}), %[[R0]] +; CHECK-NEXT: paddb 16(%{{.*}}), %[[R1]] +; CHECK-NEXT: pextrb $14, %[[R1]], 30(%{{.*}}) +; CHECK-NEXT: pextrw $6, %[[R1]], 28(%{{.*}}) +; CHECK-NEXT: pextrd $2, %[[R1]], 24(%{{.*}}) +; CHECK-NEXT: movq %[[R1]], 16(%{{.*}}) +; CHECK-NEXT: movdqa %[[R0]], (%{{.*}}) %a = load %i8vec31* %ap, align 16 %b = load %i8vec31* %bp, align 16 %x = add %i8vec31 %a, %b @@ -161,14 +178,40 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp } -; CHECK: rot %i8vec3pack = type { <3 x i8>, i8 } -define %i8vec3pack @rot() nounwind { -; CHECK: pmovzxbd {{-?[0-9]+}}(%rsp), {{%xmm[0-9]}} +define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pack* %rot) nounwind { +; CHECK-LABEL: rot: +; CHECK: movdqa {{.*}}, %[[CONSTANT0:xmm[0-9]+]] +; CHECK-NEXT: movdqa {{.*}}, %[[SHUFFLE_MASK:xmm[0-9]+]] +; CHECK-NEXT: pshufb %[[SHUFFLE_MASK]], %[[CONSTANT0]] +; CHECK-NEXT: movd %[[CONSTANT0]], %e[[R0:[abcd]]]x +; CHECK-NEXT: movw %[[R0]]x, (%[[PTR0:.*]]) +; CHECK-NEXT: movb $-98, 2(%[[PTR0]]) +; CHECK-NEXT: movdqa {{.*}}, %[[CONSTANT1:xmm[0-9]+]] +; CHECK-NEXT: pshufb %[[SHUFFLE_MASK]], %[[CONSTANT1]] +; CHECK-NEXT: movd %[[CONSTANT1]], %e[[R1:[abcd]]]x +; CHECK-NEXT: movw %[[R1]]x, (%[[PTR1:.*]]) +; CHECK-NEXT: movb $1, 2(%[[PTR1]]) +; CHECK-NEXT: pmovzxbd (%[[PTR0]]), %[[X0:xmm[0-9]+]] +; CHECK-NEXT: pand {{.*}}, %[[X0]] +; CHECK-NEXT: pextrd $1, %[[X0]], %e[[R0:[abcd]]]x +; CHECK-NEXT: shrl %e[[R0]]x +; CHECK-NEXT: movd %[[X0]], %e[[R1:[abcd]]]x +; CHECK-NEXT: shrl %e[[R1]]x +; CHECK-NEXT: movd %e[[R1]]x, %[[X1:xmm[0-9]+]] +; CHECK-NEXT: pinsrd $1, %e[[R0]]x, %[[X1]] +; CHECK-NEXT: pextrd $2, %[[X0]], %e[[R0:[abcd]]]x +; CHECK-NEXT: shrl %e[[R0]]x +; CHECK-NEXT: pinsrd $2, %e[[R0]]x, %[[X1]] +; CHECK-NEXT: pextrd $3, %[[X0]], %e[[R0:[abcd]]]x +; CHECK-NEXT: pinsrd $3, %e[[R0]]x, %[[X1]] +; CHECK-NEXT: movdqa %[[X1]], %[[X2:xmm[0-9]+]] +; CHECK-NEXT: pshufb %[[SHUFFLE_MASK]], %[[X2]] +; CHECK-NEXT: movd %[[X2]], %e[[R0:[abcd]]]x +; CHECK-NEXT: pextrb $8, %[[X1]], 2(%{{.*}}) +; CHECK-NEXT: movw %[[R0]]x, (%{{.*}}) + entry: - %X = alloca %i8vec3pack, align 4 - %rot = alloca %i8vec3pack, align 4 - %result = alloca %i8vec3pack, align 4 %storetmp = bitcast %i8vec3pack* %X to <3 x i8>* store <3 x i8> , <3 x i8>* %storetmp %storetmp1 = bitcast %i8vec3pack* %rot to <3 x i8>* @@ -180,7 +223,6 @@ entry: %shr = lshr <3 x i8> %extractVec, %extractVec3 %storetmp4 = bitcast %i8vec3pack* %result to <3 x i8>* store <3 x i8> %shr, <3 x i8>* %storetmp4 - %tmp5 = load %i8vec3pack* %result - ret %i8vec3pack %tmp5 + ret void }