mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-01 15:11:24 +00:00
Enable -sse-domain-fix by default. Now with tests!
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99954 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
428e152469
commit
bfcd61b907
@ -23,11 +23,6 @@
|
||||
#include "llvm/Target/TargetRegistry.h"
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool>
|
||||
SSEDomainFix("sse-domain-fix",
|
||||
cl::desc("Enable fixing of SSE execution domain"),
|
||||
cl::init(false), cl::Hidden);
|
||||
|
||||
static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
|
||||
Triple TheTriple(TT);
|
||||
switch (TheTriple.getOS()) {
|
||||
@ -177,7 +172,7 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
|
||||
|
||||
bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
|
||||
CodeGenOpt::Level OptLevel) {
|
||||
if (SSEDomainFix && OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
|
||||
if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
|
||||
PM.add(createSSEDomainFixPass());
|
||||
return true;
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss | count 2
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movaps | count 4
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 -o %t
|
||||
; RUN: grep movss %t | count 2
|
||||
; RUN: grep movaps %t | count 2
|
||||
; RUN: grep movdqa %t | count 2
|
||||
|
||||
define i1 @t([2 x float]* %y, [2 x float]* %w, i32, [2 x float]* %x.pn59, i32 %smax190, i32 %j.1180, <4 x float> %wu.2179, <4 x float> %wr.2178, <4 x float>* %tmp89.out, <4 x float>* %tmp107.out, i32* %indvar.next218.out) nounwind {
|
||||
newFuncRoot:
|
||||
|
@ -1,11 +1,11 @@
|
||||
; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx -o %t
|
||||
; RUN: grep unpcklpd %t | count 1
|
||||
; RUN: grep movapd %t | count 1
|
||||
; RUN: grep movaps %t | count 1
|
||||
; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx | FileCheck %s
|
||||
|
||||
; Shows a dag combine bug that will generate an illegal build vector
|
||||
; with v2i64 build_vector i32, i32.
|
||||
|
||||
; CHECK: _test:
|
||||
; CHECK: unpcklpd
|
||||
; CHECK: movapd
|
||||
define void @test(<2 x double>* %dst, <4 x double> %src) nounwind {
|
||||
entry:
|
||||
%tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> < i32 0, i32 2 >
|
||||
@ -13,6 +13,8 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: _test2:
|
||||
; CHECK: movdqa
|
||||
define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind {
|
||||
entry:
|
||||
%tmp1 = load <4 x i16>* %src
|
||||
|
@ -5,7 +5,7 @@
|
||||
; bounce the vector off of cache rather than shuffling each individual
|
||||
; element out of the index vector.
|
||||
|
||||
; CHECK: pand (%rdx), %xmm0
|
||||
; CHECK: andps (%rdx), %xmm0
|
||||
; CHECK: movaps %xmm0, -24(%rsp)
|
||||
; CHECK: movslq -24(%rsp), %rax
|
||||
; CHECK: movsd (%rdi,%rax,8), %xmm0
|
||||
|
@ -1,10 +1,8 @@
|
||||
; RUN: llc < %s -march=x86-64 > %t
|
||||
; RUN: grep unpck %t | count 2
|
||||
; RUN: grep shuf %t | count 2
|
||||
; RUN: grep ps %t | count 4
|
||||
; RUN: grep pd %t | count 4
|
||||
; RUN: grep movup %t | count 4
|
||||
; RUN: llc < %s -march=x86-64 | FileCheck %s
|
||||
|
||||
; CHECK: _a:
|
||||
; CHECK: movdqu
|
||||
; CHECK: pshufd
|
||||
define <4 x float> @a(<4 x float>* %y) nounwind {
|
||||
%x = load <4 x float>* %y, align 4
|
||||
%a = extractelement <4 x float> %x, i32 0
|
||||
@ -17,6 +15,10 @@ define <4 x float> @a(<4 x float>* %y) nounwind {
|
||||
%s = insertelement <4 x float> %r, float %a, i32 3
|
||||
ret <4 x float> %s
|
||||
}
|
||||
|
||||
; CHECK: _b:
|
||||
; CHECK: movups
|
||||
; CHECK: unpckhps
|
||||
define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind {
|
||||
%x = load <4 x float>* %y, align 4
|
||||
%a = extractelement <4 x float> %x, i32 2
|
||||
@ -29,6 +31,10 @@ define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind {
|
||||
%s = insertelement <4 x float> %r, float %b, i32 3
|
||||
ret <4 x float> %s
|
||||
}
|
||||
|
||||
; CHECK: _c:
|
||||
; CHECK: movupd
|
||||
; CHECK: shufpd
|
||||
define <2 x double> @c(<2 x double>* %y) nounwind {
|
||||
%x = load <2 x double>* %y, align 8
|
||||
%a = extractelement <2 x double> %x, i32 0
|
||||
@ -37,6 +43,10 @@ define <2 x double> @c(<2 x double>* %y) nounwind {
|
||||
%r = insertelement <2 x double> %p, double %a, i32 1
|
||||
ret <2 x double> %r
|
||||
}
|
||||
|
||||
; CHECK: _d:
|
||||
; CHECK: movupd
|
||||
; CHECK: unpckhpd
|
||||
define <2 x double> @d(<2 x double>* %y, <2 x double> %z) nounwind {
|
||||
%x = load <2 x double>* %y, align 8
|
||||
%a = extractelement <2 x double> %x, i32 1
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -march=x86-64 | grep movups | count 1
|
||||
; RUN: llc < %s -march=x86-64 | grep movdqu | count 1
|
||||
|
||||
define <2 x i64> @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
|
||||
%t = load <2 x i64>* %p, align 8
|
||||
|
@ -20,7 +20,7 @@ entry:
|
||||
; X64: pshuflw $0, %xmm0, %xmm0
|
||||
; X64: xorl %eax, %eax
|
||||
; X64: pinsrw $0, %eax, %xmm0
|
||||
; X64: movaps %xmm0, (%rdi)
|
||||
; X64: movdqa %xmm0, (%rdi)
|
||||
; X64: ret
|
||||
}
|
||||
|
||||
@ -32,7 +32,7 @@ define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
|
||||
|
||||
; X64: t1:
|
||||
; X64: movl (%rsi), %eax
|
||||
; X64: movaps (%rdi), %xmm0
|
||||
; X64: movdqa (%rdi), %xmm0
|
||||
; X64: pinsrw $0, %eax, %xmm0
|
||||
; X64: ret
|
||||
}
|
||||
@ -66,7 +66,7 @@ define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind {
|
||||
; X64: pshufhw $100, %xmm0, %xmm2
|
||||
; X64: pinsrw $1, %eax, %xmm2
|
||||
; X64: pextrw $1, %xmm0, %eax
|
||||
; X64: movaps %xmm2, %xmm0
|
||||
; X64: movdqa %xmm2, %xmm0
|
||||
; X64: pinsrw $4, %eax, %xmm0
|
||||
; X64: ret
|
||||
}
|
||||
@ -122,7 +122,7 @@ define void @t8(<2 x i64>* %res, <2 x i64>* %A) nounwind {
|
||||
; X64: t8:
|
||||
; X64: pshuflw $-58, (%rsi), %xmm0
|
||||
; X64: pshufhw $-58, %xmm0, %xmm0
|
||||
; X64: movaps %xmm0, (%rdi)
|
||||
; X64: movdqa %xmm0, (%rdi)
|
||||
; X64: ret
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,7 @@ define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
|
||||
; CHECK: test2:
|
||||
; CHECK: pcmp
|
||||
; CHECK: pcmp
|
||||
; CHECK: xorps
|
||||
; CHECK: pxor
|
||||
; CHECK: ret
|
||||
%C = icmp sge <4 x i32> %A, %B
|
||||
%D = sext <4 x i1> %C to <4 x i32>
|
||||
@ -25,7 +25,7 @@ define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
|
||||
define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
|
||||
; CHECK: test3:
|
||||
; CHECK: pcmpgtd
|
||||
; CHECK: movaps
|
||||
; CHECK: movdqa
|
||||
; CHECK: ret
|
||||
%C = icmp slt <4 x i32> %A, %B
|
||||
%D = sext <4 x i1> %C to <4 x i32>
|
||||
@ -34,7 +34,7 @@ define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
|
||||
|
||||
define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
|
||||
; CHECK: test4:
|
||||
; CHECK: movaps
|
||||
; CHECK: movdqa
|
||||
; CHECK: pcmpgtd
|
||||
; CHECK: ret
|
||||
%C = icmp ugt <4 x i32> %A, %B
|
||||
|
@ -2,7 +2,7 @@
|
||||
; CHECK: pextrd
|
||||
; CHECK: pextrd
|
||||
; CHECK: movd
|
||||
; CHECK: movaps
|
||||
; CHECK: movdqa
|
||||
|
||||
|
||||
; bitcast v14i16 to v7i32
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
%i32vec3 = type <3 x i32>
|
||||
define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
|
||||
; CHECK: movaps
|
||||
; CHECK: movdqa
|
||||
; CHECK: paddd
|
||||
; CHECK: pextrd
|
||||
; CHECK: movq
|
||||
@ -33,13 +33,13 @@ define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
|
||||
|
||||
%i32vec7 = type <7 x i32>
|
||||
define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) {
|
||||
; CHECK: movaps
|
||||
; CHECK: movaps
|
||||
; CHECK: movdqa
|
||||
; CHECK: movdqa
|
||||
; CHECK: paddd
|
||||
; CHECK: paddd
|
||||
; CHECK: pextrd
|
||||
; CHECK: movq
|
||||
; CHECK: movaps
|
||||
; CHECK: movdqa
|
||||
%a = load %i32vec7* %ap, align 16
|
||||
%b = load %i32vec7* %bp, align 16
|
||||
%x = add %i32vec7 %a, %b
|
||||
@ -49,15 +49,15 @@ define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) {
|
||||
|
||||
%i32vec12 = type <12 x i32>
|
||||
define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) {
|
||||
; CHECK: movaps
|
||||
; CHECK: movaps
|
||||
; CHECK: movaps
|
||||
; CHECK: movdqa
|
||||
; CHECK: movdqa
|
||||
; CHECK: movdqa
|
||||
; CHECK: paddd
|
||||
; CHECK: paddd
|
||||
; CHECK: paddd
|
||||
; CHECK: movaps
|
||||
; CHECK: movaps
|
||||
; CHECK: movaps
|
||||
; CHECK: movdqa
|
||||
; CHECK: movdqa
|
||||
; CHECK: movdqa
|
||||
%a = load %i32vec12* %ap, align 16
|
||||
%b = load %i32vec12* %bp, align 16
|
||||
%x = add %i32vec12 %a, %b
|
||||
@ -68,7 +68,7 @@ define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) {
|
||||
|
||||
%i16vec3 = type <3 x i16>
|
||||
define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
|
||||
; CHECK: movaps
|
||||
; CHECK: movdqa
|
||||
; CHECK: paddw
|
||||
; CHECK: movd
|
||||
; CHECK: pextrw
|
||||
@ -81,7 +81,7 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp
|
||||
|
||||
%i16vec4 = type <4 x i16>
|
||||
define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
|
||||
; CHECK: movaps
|
||||
; CHECK: movdqa
|
||||
; CHECK: paddw
|
||||
; CHECK: movq
|
||||
%a = load %i16vec4* %ap, align 16
|
||||
@ -93,12 +93,12 @@ define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp
|
||||
|
||||
%i16vec12 = type <12 x i16>
|
||||
define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
|
||||
; CHECK: movaps
|
||||
; CHECK: movaps
|
||||
; CHECK: movdqa
|
||||
; CHECK: movdqa
|
||||
; CHECK: paddw
|
||||
; CHECK: paddw
|
||||
; CHECK: movq
|
||||
; CHECK: movaps
|
||||
; CHECK: movdqa
|
||||
%a = load %i16vec12* %ap, align 16
|
||||
%b = load %i16vec12* %bp, align 16
|
||||
%x = add %i16vec12 %a, %b
|
||||
@ -108,15 +108,15 @@ define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12*
|
||||
|
||||
%i16vec18 = type <18 x i16>
|
||||
define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
|
||||
; CHECK: movaps
|
||||
; CHECK: movaps
|
||||
; CHECK: movaps
|
||||
; CHECK: movdqa
|
||||
; CHECK: movdqa
|
||||
; CHECK: movdqa
|
||||
; CHECK: paddw
|
||||
; CHECK: paddw
|
||||
; CHECK: paddw
|
||||
; CHECK: movd
|
||||
; CHECK: movaps
|
||||
; CHECK: movaps
|
||||
; CHECK: movdqa
|
||||
; CHECK: movdqa
|
||||
%a = load %i16vec18* %ap, align 16
|
||||
%b = load %i16vec18* %bp, align 16
|
||||
%x = add %i16vec18 %a, %b
|
||||
@ -127,7 +127,7 @@ define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18*
|
||||
|
||||
%i8vec3 = type <3 x i8>
|
||||
define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
|
||||
; CHECK: movaps
|
||||
; CHECK: movdqa
|
||||
; CHECK: paddb
|
||||
; CHECK: pextrb
|
||||
; CHECK: movb
|
||||
@ -140,8 +140,8 @@ define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) no
|
||||
|
||||
%i8vec31 = type <31 x i8>
|
||||
define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
|
||||
; CHECK: movaps
|
||||
; CHECK: movaps
|
||||
; CHECK: movdqa
|
||||
; CHECK: movdqa
|
||||
; CHECK: paddb
|
||||
; CHECK: paddb
|
||||
; CHECK: movq
|
||||
|
Loading…
Reference in New Issue
Block a user