From bfcd61b907e1ec7e6b21fafa7c362e3002ddf3c1 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 30 Mar 2010 22:47:00 +0000 Subject: [PATCH] Enable -sse-domain-fix by default. Now with tests! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99954 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetMachine.cpp | 7 +--- test/CodeGen/X86/2009-02-05-CoalescerBug.ll | 6 ++- test/CodeGen/X86/dagcombine-buildvector.ll | 10 +++-- test/CodeGen/X86/gather-addresses.ll | 2 +- test/CodeGen/X86/sse-align-12.ll | 22 +++++++--- test/CodeGen/X86/sse-align-6.ll | 2 +- test/CodeGen/X86/sse3.ll | 8 ++-- test/CodeGen/X86/vec_compare.ll | 6 +-- test/CodeGen/X86/widen_cast-2.ll | 2 +- test/CodeGen/X86/widen_load-2.ll | 46 ++++++++++----------- 10 files changed, 60 insertions(+), 51 deletions(-) diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 06a481de258..c608e56c8fb 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -23,11 +23,6 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static cl::opt -SSEDomainFix("sse-domain-fix", - cl::desc("Enable fixing of SSE execution domain"), - cl::init(false), cl::Hidden); - static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { @@ -177,7 +172,7 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - if (SSEDomainFix && OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) { + if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) { PM.add(createSSEDomainFixPass()); return true; } diff --git a/test/CodeGen/X86/2009-02-05-CoalescerBug.ll b/test/CodeGen/X86/2009-02-05-CoalescerBug.ll index 0ffa8fdc30d..a46a20b1da6 100644 --- a/test/CodeGen/X86/2009-02-05-CoalescerBug.ll +++ b/test/CodeGen/X86/2009-02-05-CoalescerBug.ll @@ -1,5 +1,7 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss | count 2 -; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movaps | count 4 +; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 -o %t +; RUN: grep movss %t | count 2 +; RUN: grep movaps %t | count 2 +; RUN: grep movdqa %t | count 2 define i1 @t([2 x float]* %y, [2 x float]* %w, i32, [2 x float]* %x.pn59, i32 %smax190, i32 %j.1180, <4 x float> %wu.2179, <4 x float> %wr.2178, <4 x float>* %tmp89.out, <4 x float>* %tmp107.out, i32* %indvar.next218.out) nounwind { newFuncRoot: diff --git a/test/CodeGen/X86/dagcombine-buildvector.ll b/test/CodeGen/X86/dagcombine-buildvector.ll index c0ee2ac3386..2264dc870a7 100644 --- a/test/CodeGen/X86/dagcombine-buildvector.ll +++ b/test/CodeGen/X86/dagcombine-buildvector.ll @@ -1,11 +1,11 @@ -; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx -o %t -; RUN: grep unpcklpd %t | count 1 -; RUN: grep movapd %t | count 1 -; RUN: grep movaps %t | count 1 +; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx | FileCheck %s ; Shows a dag combine bug that will generate an illegal build vector ; with v2i64 build_vector i32, i32. +; CHECK: _test: +; CHECK: unpcklpd +; CHECK: movapd define void @test(<2 x double>* %dst, <4 x double> %src) nounwind { entry: %tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> < i32 0, i32 2 > @@ -13,6 +13,8 @@ entry: ret void } +; CHECK: _test2: +; CHECK: movdqa define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind { entry: %tmp1 = load <4 x i16>* %src diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll index 3e730de0a8e..134ee28df6c 100644 --- a/test/CodeGen/X86/gather-addresses.ll +++ b/test/CodeGen/X86/gather-addresses.ll @@ -5,7 +5,7 @@ ; bounce the vector off of cache rather than shuffling each individual ; element out of the index vector. -; CHECK: pand (%rdx), %xmm0 +; CHECK: andps (%rdx), %xmm0 ; CHECK: movaps %xmm0, -24(%rsp) ; CHECK: movslq -24(%rsp), %rax ; CHECK: movsd (%rdi,%rax,8), %xmm0 diff --git a/test/CodeGen/X86/sse-align-12.ll b/test/CodeGen/X86/sse-align-12.ll index 4f025b916fd..b31a02eea29 100644 --- a/test/CodeGen/X86/sse-align-12.ll +++ b/test/CodeGen/X86/sse-align-12.ll @@ -1,10 +1,8 @@ -; RUN: llc < %s -march=x86-64 > %t -; RUN: grep unpck %t | count 2 -; RUN: grep shuf %t | count 2 -; RUN: grep ps %t | count 4 -; RUN: grep pd %t | count 4 -; RUN: grep movup %t | count 4 +; RUN: llc < %s -march=x86-64 | FileCheck %s +; CHECK: _a: +; CHECK: movdqu +; CHECK: pshufd define <4 x float> @a(<4 x float>* %y) nounwind { %x = load <4 x float>* %y, align 4 %a = extractelement <4 x float> %x, i32 0 @@ -17,6 +15,10 @@ define <4 x float> @a(<4 x float>* %y) nounwind { %s = insertelement <4 x float> %r, float %a, i32 3 ret <4 x float> %s } + +; CHECK: _b: +; CHECK: movups +; CHECK: unpckhps define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind { %x = load <4 x float>* %y, align 4 %a = extractelement <4 x float> %x, i32 2 @@ -29,6 +31,10 @@ define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind { %s = insertelement <4 x float> %r, float %b, i32 3 ret <4 x float> %s } + +; CHECK: _c: +; CHECK: movupd +; CHECK: shufpd define <2 x double> @c(<2 x double>* %y) nounwind { %x = load <2 x double>* %y, align 8 %a = extractelement <2 x double> %x, i32 0 @@ -37,6 +43,10 @@ define <2 x double> @c(<2 x double>* %y) nounwind { %r = insertelement <2 x double> %p, double %a, i32 1 ret <2 x double> %r } + +; CHECK: _d: +; CHECK: movupd +; CHECK: unpckhpd define <2 x double> @d(<2 x double>* %y, <2 x double> %z) nounwind { %x = load <2 x double>* %y, align 8 %a = extractelement <2 x double> %x, i32 1 diff --git a/test/CodeGen/X86/sse-align-6.ll b/test/CodeGen/X86/sse-align-6.ll index 0bbf4228a40..fcea1b102a2 100644 --- a/test/CodeGen/X86/sse-align-6.ll +++ b/test/CodeGen/X86/sse-align-6.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 | grep movups | count 1 +; RUN: llc < %s -march=x86-64 | grep movdqu | count 1 define <2 x i64> @bar(<2 x i64>* %p, <2 x i64> %x) nounwind { %t = load <2 x i64>* %p, align 8 diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll index e9c2c01a9e4..b969ecb4142 100644 --- a/test/CodeGen/X86/sse3.ll +++ b/test/CodeGen/X86/sse3.ll @@ -20,7 +20,7 @@ entry: ; X64: pshuflw $0, %xmm0, %xmm0 ; X64: xorl %eax, %eax ; X64: pinsrw $0, %eax, %xmm0 -; X64: movaps %xmm0, (%rdi) +; X64: movdqa %xmm0, (%rdi) ; X64: ret } @@ -32,7 +32,7 @@ define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; X64: t1: ; X64: movl (%rsi), %eax -; X64: movaps (%rdi), %xmm0 +; X64: movdqa (%rdi), %xmm0 ; X64: pinsrw $0, %eax, %xmm0 ; X64: ret } @@ -66,7 +66,7 @@ define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind { ; X64: pshufhw $100, %xmm0, %xmm2 ; X64: pinsrw $1, %eax, %xmm2 ; X64: pextrw $1, %xmm0, %eax -; X64: movaps %xmm2, %xmm0 +; X64: movdqa %xmm2, %xmm0 ; X64: pinsrw $4, %eax, %xmm0 ; X64: ret } @@ -122,7 +122,7 @@ define void @t8(<2 x i64>* %res, <2 x i64>* %A) nounwind { ; X64: t8: ; X64: pshuflw $-58, (%rsi), %xmm0 ; X64: pshufhw $-58, %xmm0, %xmm0 -; X64: movaps %xmm0, (%rdi) +; X64: movdqa %xmm0, (%rdi) ; X64: ret } diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll index c8c7257cbb9..39c9b770d5f 100644 --- a/test/CodeGen/X86/vec_compare.ll +++ b/test/CodeGen/X86/vec_compare.ll @@ -15,7 +15,7 @@ define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind { ; CHECK: test2: ; CHECK: pcmp ; CHECK: pcmp -; CHECK: xorps +; CHECK: pxor ; CHECK: ret %C = icmp sge <4 x i32> %A, %B %D = sext <4 x i1> %C to <4 x i32> @@ -25,7 +25,7 @@ define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind { define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind { ; CHECK: test3: ; CHECK: pcmpgtd -; CHECK: movaps +; CHECK: movdqa ; CHECK: ret %C = icmp slt <4 x i32> %A, %B %D = sext <4 x i1> %C to <4 x i32> @@ -34,7 +34,7 @@ define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind { define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind { ; CHECK: test4: -; CHECK: movaps +; CHECK: movdqa ; CHECK: pcmpgtd ; CHECK: ret %C = icmp ugt <4 x i32> %A, %B diff --git a/test/CodeGen/X86/widen_cast-2.ll b/test/CodeGen/X86/widen_cast-2.ll index 1e626a2f882..14e8f756248 100644 --- a/test/CodeGen/X86/widen_cast-2.ll +++ b/test/CodeGen/X86/widen_cast-2.ll @@ -2,7 +2,7 @@ ; CHECK: pextrd ; CHECK: pextrd ; CHECK: movd -; CHECK: movaps +; CHECK: movdqa ; bitcast v14i16 to v7i32 diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll index 58b557a7915..658c05143e8 100644 --- a/test/CodeGen/X86/widen_load-2.ll +++ b/test/CodeGen/X86/widen_load-2.ll @@ -5,7 +5,7 @@ %i32vec3 = type <3 x i32> define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { -; CHECK: movaps +; CHECK: movdqa ; CHECK: paddd ; CHECK: pextrd ; CHECK: movq @@ -33,13 +33,13 @@ define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { %i32vec7 = type <7 x i32> define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) { -; CHECK: movaps -; CHECK: movaps +; CHECK: movdqa +; CHECK: movdqa ; CHECK: paddd ; CHECK: paddd ; CHECK: pextrd ; CHECK: movq -; CHECK: movaps +; CHECK: movdqa %a = load %i32vec7* %ap, align 16 %b = load %i32vec7* %bp, align 16 %x = add %i32vec7 %a, %b @@ -49,15 +49,15 @@ define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) { %i32vec12 = type <12 x i32> define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) { -; CHECK: movaps -; CHECK: movaps -; CHECK: movaps +; CHECK: movdqa +; CHECK: movdqa +; CHECK: movdqa ; CHECK: paddd ; CHECK: paddd ; CHECK: paddd -; CHECK: movaps -; CHECK: movaps -; CHECK: movaps +; CHECK: movdqa +; CHECK: movdqa +; CHECK: movdqa %a = load %i32vec12* %ap, align 16 %b = load %i32vec12* %bp, align 16 %x = add %i32vec12 %a, %b @@ -68,7 +68,7 @@ define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) { %i16vec3 = type <3 x i16> define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind { -; CHECK: movaps +; CHECK: movdqa ; CHECK: paddw ; CHECK: movd ; CHECK: pextrw @@ -81,7 +81,7 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp %i16vec4 = type <4 x i16> define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind { -; CHECK: movaps +; CHECK: movdqa ; CHECK: paddw ; CHECK: movq %a = load %i16vec4* %ap, align 16 @@ -93,12 +93,12 @@ define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp %i16vec12 = type <12 x i16> define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind { -; CHECK: movaps -; CHECK: movaps +; CHECK: movdqa +; CHECK: movdqa ; CHECK: paddw ; CHECK: paddw ; CHECK: movq -; CHECK: movaps +; CHECK: movdqa %a = load %i16vec12* %ap, align 16 %b = load %i16vec12* %bp, align 16 %x = add %i16vec12 %a, %b @@ -108,15 +108,15 @@ define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %i16vec18 = type <18 x i16> define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind { -; CHECK: movaps -; CHECK: movaps -; CHECK: movaps +; CHECK: movdqa +; CHECK: movdqa +; CHECK: movdqa ; CHECK: paddw ; CHECK: paddw ; CHECK: paddw ; CHECK: movd -; CHECK: movaps -; CHECK: movaps +; CHECK: movdqa +; CHECK: movdqa %a = load %i16vec18* %ap, align 16 %b = load %i16vec18* %bp, align 16 %x = add %i16vec18 %a, %b @@ -127,7 +127,7 @@ define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %i8vec3 = type <3 x i8> define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind { -; CHECK: movaps +; CHECK: movdqa ; CHECK: paddb ; CHECK: pextrb ; CHECK: movb @@ -140,8 +140,8 @@ define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) no %i8vec31 = type <31 x i8> define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind { -; CHECK: movaps -; CHECK: movaps +; CHECK: movdqa +; CHECK: movdqa ; CHECK: paddb ; CHECK: paddb ; CHECK: movq