Use PMULDQ for v2i64 multiplies when SSE4.1 is available. And add

load-folding table entries for PMULDQ and PMULLD.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@51489 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Dan Gohman
2008-05-23 17:49:40 +00:00
parent d4083d01d2
commit 0b924dcef8
4 changed files with 45 additions and 7 deletions

32
test/CodeGen/X86/pmul.ll Normal file
View File

@@ -0,0 +1,32 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 > %t
; RUN: grep pmul %t | count 6
; RUN: grep mov %t | count 8
define <4 x i32> @a(<4 x i32> %i) nounwind {
%A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
ret <4 x i32> %A
}
define <2 x i64> @b(<2 x i64> %i) nounwind {
%A = mul <2 x i64> %i, < i64 117, i64 117 >
ret <2 x i64> %A
}
define <4 x i32> @c(<4 x i32> %i, <4 x i32> %j) nounwind {
%A = mul <4 x i32> %i, %j
ret <4 x i32> %A
}
define <2 x i64> @d(<2 x i64> %i, <2 x i64> %j) nounwind {
%A = mul <2 x i64> %i, %j
ret <2 x i64> %A
}
; Use a call to force spills.
declare void @foo()
define <4 x i32> @e(<4 x i32> %i, <4 x i32> %j) nounwind {
call void @foo()
%A = mul <4 x i32> %i, %j
ret <4 x i32> %A
}
define <2 x i64> @f(<2 x i64> %i, <2 x i64> %j) nounwind {
call void @foo()
%A = mul <2 x i64> %i, %j
ret <2 x i64> %A
}